-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathicd9_xml_to_json.py
70 lines (49 loc) · 1.56 KB
/
icd9_xml_to_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
import xmltodict
import re
import argparse
import os
# List of parsed ICD9 items
icd_list = []
def parse_node(node):
"""
This method takes a node and checks whether it is a leaf.
If so, it puts it to icd_list.
"""
if("nodes" not in node.keys()):
icd_list.append({"code": node['@code'], "name": node['name']})
else:
parse_item(node)
def parse_item(item):
"""
This method takes an item and process it's nodes.
As xmltodict returns inconsistently list or just one
item it has to be handled.
"""
if 'nodes' not in item.keys():
return
# Handling node appearing as list
if(isinstance(item['nodes']['node'], list)):
for node in item['nodes']['node']:
parse_node(node)
# Handling node appearing as one item
else:
parse_node(item['nodes']['node'])
parser = argparse.ArgumentParser()
parser.add_argument("path", help="Input file path")
parser.add_argument("-o", "--output", help="Output file path")
parser.add_argument("-n", "--number", action="store_true", help="Display number of parsed items")
args = parser.parse_args()
with open(args.path, "r") as ICD9_xml:
data_dict = xmltodict.parse(ICD9_xml.read())
ICD9_xml.close()
parse_item(data_dict['hcd'])
if args.output:
out_path = args.output
else:
out_path = os.path.splitext(args.path)[0] + ".json"
with open(out_path, "w") as ICD9_json:
json.dump(icd_list, ICD9_json, ensure_ascii=False)
ICD9_json.close()
if(args.number):
print("Total number of parsed items:", len(icd_list))