-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathCreateMLDatasetPatientNum.py
57 lines (54 loc) · 2.05 KB
/
CreateMLDatasetPatientNum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created on Apr 14, 2016
@author: Nikola
Created at the University of Manchester, School of Computer Science
Licence GNU/GPL 3.0
'''
from QueryDBClass import QueryDBCalss
from Data.Table import Table
import re
if __name__=="__main__":
queryclass = QueryDBCalss("localhost","root","","table_db" )
results = queryclass.getDataForDevDataset()
target = open("developmentCellDataset.csv", 'w')
target.write("ArticleId,PMCid,TableName,SpecPragmatics,CellContent,Header,Stub,SuperRow,rowN,columnN,function,class\n")
for res in results:
idArticle = res[0]
PMCid = res[1]
idTable = res[2]
TableOrder = res[3]
SpecPragmatic = res[4]
idCell = res[5]
CellType = res[6]
RowN = res[7]
ColumnN =res[8]
Content = res[9]
Header = res[10]
Stub = res[11]
SuperRow = res[12]
result2 = queryclass.getCellRole(idCell)
CellRole = ""
for role in result2:
CellRole = CellRole+str(role[0])
if(Content!=None):
Content = Content.replace("'","\'").replace("\n"," ")
Content = re.sub(r'\d','x',Content)
if(Header!=None):
Header = Header.replace("'","\'").replace("\n"," ")
Header = re.sub(r'\d','x',Header)
if(Stub!=None):
Stub = Stub.replace("'","\'").replace("\n"," ")
Stub = re.sub(r'\d','x',Stub)
if(SuperRow!=None):
SuperRow = SuperRow.replace("'","\'").replace("\n"," ")
SuperRow = re.sub(r'\d','x',SuperRow)
try:
target.write(str(idArticle)+","+str(PMCid)+","+str(TableOrder)+","+str(SpecPragmatic)+",\""+str(Content)+
"\",\""+str(Header)+"\",\""+str(Stub)+"\",\""+str(SuperRow)+"\","+str(RowN)+","+str(ColumnN)
+","+str(CellRole)+",?"+"\n")
except:
print "ascii err"
target.close()
print "Done"