-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreateBgrfListAsTsv.py
66 lines (58 loc) · 2.2 KB
/
createBgrfListAsTsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
'''
Create a list of all to be imported BGRF Data
BGRF-ID, title, author, authorX..
'''
import os
import csv
dirname = os.path.dirname(__file__)
TSV_BGRF100 = os.path.join(dirname, 'data/BGRF_100.tsv')
TSV_BGRF2000_m100 = os.path.join(dirname, 'data/BGRF_2000-100.tsv')
header = ['BGRF_ID', 'Title', 'Author']
def getDict(filename):
BGRF = {}
rowCounter = 0
with open(filename, 'r', newline='') as tsv_data:
reader = csv.DictReader(tsv_data, delimiter='\t')
for row in reader:
if rowCounter > 1 and 'P13' in row and row['P13'] != '' and 'check' in row and row['check'] == '' \
or rowCounter > 1 and 'P13' in row and row['P13'] != '' and 'check' not in row:
authorCount = 0
tmpDict = {}
tmpDict.update({'Title':row['P4']})
if 'P5' in row and row['P5'] != '':
tmpDict.update({'Author':row['P5']})
whileAuthor = True
while(whileAuthor):
authorCount = authorCount + 1
if str(authorCount) + 'P5' in row and row[str(authorCount) + 'P5'] != '':
tmpDict.update({'Author' + str(authorCount):row[str(authorCount) + 'P5']})
if 'Author' + str(authorCount) not in header:
header.append('Author' + str(authorCount))
else:
whileAuthor = False
BGRF.update({row['P13']:tmpDict})
rowCounter = rowCounter + 1
return BGRF
Dict_BGRF100 = getDict(TSV_BGRF100)
Dict_BGRF2000_m100 = getDict(TSV_BGRF2000_m100)
DICT_BGRF = {**Dict_BGRF100, **Dict_BGRF2000_m100}
TSV_BGRF = os.path.join(dirname, 'data/importedBGRFList.tsv')
with open(TSV_BGRF, 'w') as out_file:
tsv_writer = csv.writer(out_file, delimiter='\t')
tsv_writer.writerow(header)
for key in sorted(DICT_BGRF):
tmpList = []
tmpList.append(key)
tmpList.append(DICT_BGRF[key]['Title'])
if 'Author' in DICT_BGRF[key]:
tmpList.append(DICT_BGRF[key]['Author'])
whileAuthor = True
authorCount = 0
while(whileAuthor):
authorCount = authorCount + 1
if 'Author' + str(authorCount) in DICT_BGRF[key]:
tmpList.append(DICT_BGRF[key]['Author' + str(authorCount)])
else:
whileAuthor = False
tsv_writer.writerow(tmpList)
#print(key + '' + str(DICT_BGRF[key]))