-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetValEmbeddings.py
39 lines (32 loc) · 1.28 KB
/
getValEmbeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import numpy as np
import pandas as pd
import os, os.path
import skipthoughts
model = skipthoughts.load_model()
encoder = skipthoughts.Encoder(model)
curr_dir = os.getcwd()
val_dir = os.path.join(curr_dir, "Data/val.csv")
val_data = pd.read_csv(val_dir)
val_data = val_data.drop(columns="InputStoryid")
val_data.index.name = 'id'
val_data.rename(columns={'InputSentence1':'Sentence1',
'InputSentence2':'Sentence2',
'InputSentence3':'Sentence3',
'InputSentence4':'Sentence4',
'RandomFifthSentenceQuiz1':'Ending1',
'RandomFifthSentenceQuiz2':'Ending2',
'AnswerRightEnding':'RightEnding'},
inplace=True)
n_samples = val_data.shape[0]
n_sentences = val_data.shape[1] - 1
sentencesToEmbed = []
for i in range(n_samples):
for j in range(n_sentences):
sentencesToEmbed.append(val_data.iloc[i, j])
valEmbeddings = encoder.encode(sentencesToEmbed)
for i in range(n_samples):
for j in range(n_sentences):
val_data.iat[i, j] = valEmbeddings[i*n_sentences+j].tolist()
embeddedValDir = os.path.join(curr_dir, "Embeddings/embeddedVal.npy")
embeddedValDict = val_data.to_dict()
np.save(embeddedValDir, embeddedValDict)