-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathres_file_reader.py
35 lines (30 loc) · 1.23 KB
/
res_file_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class ResFileReader(object):
"""Process the result file from the search engine.
This class can be extended to handle various file formats coming from
diffeernt IR engines, i.e. Terrier (answerbag) and Watson (jeopardy)
The information we need to extract is qid, if the candidate answer is
relevant or not and the score IR engine assigned to this answer
FIX: pretty up this class later on.
"""
def __init__(self, format="trec"):
self.read_line = self.__getattribute__("read_line_%s" % format)
def read_line_answerbag(self, line):
tokens = line.strip().split("\t")
qid = tokens[0]
aid = tokens[1]
# this is the rank of the candidate, not the SE score, hence we invert it.
ir_score = tokens[2]
relevant = tokens[3] # true or false
return qid, aid, relevant, ir_score
def read_line_trec(self, line):
"""Process resultset where each line is in the TREC resultset format.
Each line is formatted as follows:
qid aid rank score relevance text
"""
tokens = line.strip().split()
qid = tokens[0]
aid = tokens[1]
# rank = int(tokens[2])
ir_score = float(tokens[3]) # we invert the score
relevant = tokens[4] # true or false
return qid, aid, relevant, ir_score