-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathqa.py
208 lines (170 loc) · 6.32 KB
/
qa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
'''
NLP Story Q/A bot
by csmaher and vmelkote
qa.py: program entry point
python3 qa.py > output
'''
import utils
from utils import (nltk, stopwords, get_sentences, get_bow,
generate_collocations, generate_wn_list)
# import operator, re, nltk, utils
from answer_sentences import (baseline, choose_sentence)
from answer_phrases import find_answer
from qa_engine.base import QABase
from qa_engine.score_answers import main as score_answers
def get_answer(question, story):
"""
:param question: dict
:param story: dict
:return: str
question is a dictionary with keys:
dep -- A list of dependency graphs for the question sentence.
par -- A list of constituency parses for the question sentence.
text -- The raw text of story.
sid -- The story id.
difficulty -- easy, medium, or hard
type -- whether you need to use the 'sch' or 'story' versions
of the .
id -- The id of the question.
story is a dictionary with keys:
story_dep -- list of dependency graphs for each sentence of
the story version.
sch_dep -- list of dependency graphs for each sentence of
the sch version.
sch_par -- list of constituency parses for each sentence of
the sch version.
story_par -- list of constituency parses for each sentence of
the story version.
sch -- the raw text for the sch version.
text -- the raw text for the story version.
sid -- the story id
"""
### Your Code Goes Here ###
# print(story["text"])
#generate resources needed throughout the program first
generate_collocations()
generate_wn_list(story)
# # use sch if it's there
if(isinstance(story["sch"], str)):
sentences = get_sentences(story["sch"])
# print(sentences)
else:
sentences = get_sentences(story["text"])
# # print("\n" + question_word + "\n")
# print(question)
print(question['qid'])
# print(question["text"])
print('difficulty: ' + question['difficulty'])
# print(question["qid"])
# + ": " + question["text"])
# print(question['dep'])
# print(story['text'])
# print(story['sch'])
qbow = get_bow(get_sentences(question["text"])[0], stopwords)
print("qbow:" + str(qbow))
b = baseline(qbow, sentences, stopwords)
print("Baseline output: " + str(b))
base = " ".join([t[0] for t in b])
answer = base
# return answer
# print(question['difficulty'])
# if (question['difficulty'] == 'Discourse'):
# None
# qword_text = nltk.Text(nltk.word_tokenize(question['text']))
# print(qword_text.collocations(10))
# else:
# # #if sch is not available use our algorithm
# # if(not isinstance(story["sch"], str)):
# #choose sentence arbitrates strategy to use for finding best sentence
# # noun_ids = load_wordnet_ids("wordnet/Wordnet_nouns.csv")
# # verb_ids = load_wordnet_ids("wordnet/Wordnet_verbs.csv")
# #get nouns and verbs out of question so we can run them through wordNet
# q_dep_graph = question["dep"]
# # print(q_dep_graph)
# q_verbs = []
# q_nouns = []
# important_qbow = []
# q_noun_synsets = {}
# q_verb_synsets = {}
# for nodeNum in q_dep_graph.nodes:
# node = q_dep_graph.get_by_address(nodeNum)
# # print(node)
# if node['tag'][0] is 'V':
# q_verbs.append(node['word'])
# elif node['tag'][0] is 'N':
# q_nouns.append(node['word'])
# #add any qbow words that weren't verbs or nouns
# for qword in qbow:
# if qword not in q_verbs and qword not in q_nouns:
# important_qbow.append(qword)
# for noun in q_nouns:
# q_noun_synsets[noun] = wn.synsets(noun)
# for verb in q_verbs:
# q_verb_synsets[verb] = wn.synsets(verb)
# # print(question['text'])
# print(q_verbs, q_nouns, important_qbow)
# for qword in qbow:
# q_synsets = wn.synsets(qword)
# if q_synsets is not None:
# for q_synset in q_synsets:
sentence = choose_sentence(question, story)
if(isinstance(story["sch"], str)):
sentences = nltk.sent_tokenize(story["sch"])
s_dep = story['sch_dep']
s_con = story['sch_par']
# print(sentences)
else:
sentences = nltk.sent_tokenize(story["text"])
s_dep = story['story_dep']
s_con = story['story_par']
if sentence != None:
answer = sentence
#call function to get part relevant of sentence out
# s_dep
if isinstance(sentence, list):
full_answer = ''
for chosen_sent in sentence:
i = 0
for sent in sentences:
if sent == chosen_sent:
# print(s_dep[i])
full_answer += find_answer(question, s_dep[i], s_con[i])
full_answer += ' '
i+=1
answer = full_answer
else:
i = 0
for sent in sentences:
print("sentence iterating over: " + sent)
print("sentence from choose_sentence: " + sentence)
if sent == sentence:
# print(s_dep[i])
answer = find_answer(question, s_dep[i], s_con[i])
i+=1
# print(answer + "\n")
# if(isinstance(story["sch"], str)):
# print("Scherezade\n")
### End of Your Code ###
print("answer:")
print(answer)
print()
return answer
#############################################################
### Dont change the code below here
#############################################################
class QAEngine(QABase):
@staticmethod
def answer_question(question, story):
answer = get_answer(question, story)
return answer
def run_qa():
QA = QAEngine()
QA.run()
QA.save_answers()
def main():
run_qa()
# You can uncomment this next line to evaluate your
# answers, or you can run score_answers.py
score_answers()
if __name__ == "__main__":
main()