-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetTestReportEmbeddings.py
39 lines (32 loc) · 1.31 KB
/
getTestReportEmbeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import numpy as np
import pandas as pd
import os, os.path
import skipthoughts
model = skipthoughts.load_model()
encoder = skipthoughts.Encoder(model)
curr_dir = os.getcwd()
test_dir = os.path.join(curr_dir, "Data/testReport.csv")
test_data = pd.read_csv(test_dir)
test_data = test_data.drop(columns="InputStoryid")
test_data.index.name = 'id'
test_data.rename(columns={'InputSentence1':'Sentence1',
'InputSentence2':'Sentence2',
'InputSentence3':'Sentence3',
'InputSentence4':'Sentence4',
'RandomFifthSentenceQuiz1':'Ending1',
'RandomFifthSentenceQuiz2':'Ending2',
'AnswerRightEnding':'RightEnding'},
inplace=True)
n_samples = test_data.shape[0]
n_sentences = test_data.shape[1] - 1
sentencesToEmbed = []
for i in range(n_samples):
for j in range(n_sentences):
sentencesToEmbed.append(test_data.iloc[i, j])
testEmbeddings = encoder.encode(sentencesToEmbed)
for i in range(n_samples):
for j in range(n_sentences):
test_data.iat[i, j] = testEmbeddings[i*n_sentences+j].tolist()
embeddedTestDir = os.path.join(curr_dir, "Embeddings/embeddedTestReport.npy")
embeddedTestDict = test_data.to_dict()
np.save(embeddedTestDir, embeddedTestDict)