-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathextract.py
34 lines (25 loc) · 1.19 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
answer_path = "data/00/dev.answer"
question_path = "data/00/dev.question"
context_path = "data/00/dev.context"
span_path = "data/00/dev.span"
assert os.path.exists(answer_path)
keyword = "microsoft"
contexts = open(context_path, mode='r', encoding='utf8').read().splitlines()
answers = open(answer_path, mode='r', encoding='utf8').read().splitlines()
questions = open(question_path, mode='r', encoding='utf8').read().splitlines()
spans = open(span_path, mode='r', encoding='utf8').read().splitlines()
indices = [i for i, context in enumerate(contexts) if keyword in context]
print(len(indices), " contexts")
new_context = [contexts[i] for i in indices]
new_answer = [answers[i] for i in indices]
new_question = [questions[i] for i in indices]
new_span = [spans[i] for i in indices]
with open("data/dev.answer", mode='w', encoding='utf8') as file:
file.write("\n".join(new_answer))
with open("data/dev.question", mode='w', encoding='utf8') as file:
file.write("\n".join(new_question))
with open("data/dev.context", mode='w', encoding='utf8') as file:
file.write("\n".join(new_context))
with open("data/dev.span", mode='w', encoding='utf8') as file:
file.write("\n".join(new_span))