-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbm25_search.py
executable file
·43 lines (37 loc) · 1.13 KB
/
bm25_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import argparse
import json
from elasticsearch import Elasticsearch
parser = argparse.ArgumentParser()
parser.add_argument("queries", type=str, help="Input file with queries")
parser.add_argument("indexname", type=str, help="Elasticsearch index name")
args = parser.parse_args()
def res(results, query="1", tag="tag"):
# process query results and output them in trec_eval format
rank = 0
for hit in results:
rank += 1
docid = hit["_source"]["DOCID"]
score = hit["_score"]
print(query, "Q0", docid, rank, score, tag, sep="\t")
# connect to the server
es = Elasticsearch("http://localhost:9200")
# load queries in JSON format
with open(args.queries, "r") as infile:
queries = json.loads(infile.read())
queries_list = queries["QUERIES"]
for query in queries_list:
num = query["QUERYID"]
text = query["QUERY"]
query_dict = {
"bool": {
"should": [
{ "match": { "TEXT": text } }
]
}
}
response = es.search(
index=args.indexname,
query=query_dict,
size=1000
)
res(response["hits"]["hits"], num)