-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_api_data.py
92 lines (57 loc) · 1.92 KB
/
build_api_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import json, glob, csv
import requests
import os.path
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
tools = ["spotlight","stanford","nltk","spacy","opener","parsey"]
for directory in glob.glob('data/*'):
if directory != 'data/whitney_finding_aid':
continue
print('Working ', directory)
data_name = directory.replace('data/','')
text = open(directory+'/text.txt', encoding="utf-8").read()
text_lines = text.split('\n')
text_data = chunks(text_lines,10)
text_data_blocks = []
for block in text_data:
text_block = "\n".join(list(block))
text_data_blocks.append(text_block)
for t in tools:
data = {}
print("\t",t)
out_file_name = directory+'/results_'+t +'_'+data_name+'.json'
if os.path.isfile(out_file_name):
continue
for text_block in text_data_blocks:
if text_block.strip() == '':
continue
print('----')
print(text_block)
print('----')
try:
r = requests.post('https://nerserver.semlab.io/compiled', json={"text": text_block, "tool":[t]})
except Exception as error:
print('-->',r.text,'<--')
print("error on this one!!!!!")
continue
try:
results = r.json()
print(results)
except Exception as error:
pass
for result in results['results']:
if result not in data:
if 'typeMode' in results['results'][result]:
results['results'][result]['typeMode'] = [results['results'][result]['typeMode']]
data[result] = results['results'][result]
else:
if 'typeMode' in results['results'][result]:
if results['results'][result]['typeMode'] not in data[result]['typeMode']:
data[result]['typeMode'].append(results['results'][result]['typeMode'])
json.dump(data,open(out_file_name,'w'),indent=2)
# console.log(data)
# for json_file in glob.glob(directory+'/*.json'):
# json_data = json.load(open(json_file))
# print(json_data)