-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsv_generator.py
45 lines (39 loc) · 1.8 KB
/
csv_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import json
def create_csv(input_file, output_file):
label_kinds = ["FOOD#QUALITY", "RESTAURANT#GENERAL", "SERVICE#GENERAL", "AMBIENCE#GENERAL",
"RESTAURANT#MISCELLANEOUS", "FOOD#PRICES", "RESTAURANT#PRICES", "DRINKS#QUALITY",
"LOCATION#GENERAL", "DRINKS#PRICES"]
sentences = []
labels = []
with open(input_file) as j:
reviews = json.load(j)['Reviews']['Review']
for d in reviews:
for s in d["sentences"]['sentence']:
# register words
if "@OutOfScope" not in s and "Opinions" in s and "text" in s:
if isinstance(s["Opinions"]["Opinion"], list):
annotations = [o["@category"] for o in s["Opinions"]["Opinion"]]
else:
annotations = [s["Opinions"]["Opinion"]["@category"]]
if len(annotations) > 0:
try:
row = []
for k in label_kinds:
if k in annotations:
row.append('1')
else:
row.append('0')
labels.append(row)
sentences.append(s['text'])
except TypeError:
continue
with open(output_file, 'w') as file:
file.write('text_' + "_".join(label_kinds) + "\n")
for i in range(len(sentences)):
line = sentences[i] + str('_') + "_".join(labels[i]) + "\n"
try:
file.write(line)
except UnicodeEncodeError:
continue
create_csv("Data/ABSA-15_Restaurants_Train.json", 'Data/aspects_train.csv');
create_csv("Data/ABSA15_Restaurants_Test.json", 'Data/aspects_test.csv');