-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmappings_to_codelist_rules.py
114 lines (102 loc) · 4.69 KB
/
mappings_to_codelist_rules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import json
from lxml import etree as ET
def find_equivalent_mapping_element(mapping, rule_mappings):
path = mapping.find('path').text
if (mapping.find('condition') is not None):
condition = mapping.find('condition').text
else:
condition = ""
name = mapping.find('codelist').attrib['ref']
for rule_mapping in rule_mappings.getroot().xpath('//mapping'):
rule_map_path = rule_mapping.find('path').text
if (rule_mapping.find('condition') is not None):
rule_condition = rule_mapping.find('condition').text
else:
rule_condition = ""
rule_name = rule_mapping.find('codelist').attrib['ref']
if rule_map_path == path and rule_condition == condition and rule_name == name:
return rule_mapping
return None
def mapping_to_codelist_rules(mappings, rule_mappings):
data = dict()
for mapping in mappings.getroot().xpath('//mapping'):
path_ref = mapping.find('path').text.split('/@')
# handle edge case of path:
# '//iati-activity/crs-add/channel-code/text()'
if len(path_ref) != 2:
split = mapping.find('path').text.rpartition('/')
path_ref = [split[0], split[2]]
path = path_ref[0]
# change to direct reference paths
path = path.replace('//iati-activities', '/iati-activities')
path = path.replace('//iati-activity', '/iati-activities/iati-activity')
path = path.replace('//iati-organisations', '/organisations')
path = path.replace('//iati-organisation', '/iati-organisations/iati-organisation')
attribute = path_ref[1]
name = mapping.find('codelist').attrib['ref']
file_name = name + '.xml'
# get allowed codes into a list
codelist = ET.parse(os.path.join('IATI-Codelists' ,'combined-xml', file_name))
codes = codelist.getroot().xpath('//code')
allowedCodes = []
for code in codes:
allowedCodes.append(code.text)
existingPath = data.get(path) is not None
existingPathAtr = ''
if existingPath:
existingPathAtr = data[path].get(attribute) is not None
out = {
path: {
attribute: {
}
}
}
if (mapping.find('condition') is not None):
# parse condition xpath
condition = mapping.find('condition').text
parts = condition.split(' or ')
splitfirst = parts[0].split(' = ')
link = splitfirst[0].lstrip('@')
linkValue = splitfirst[1].strip("'")
# import pdb; pdb.set_trace() # debugging code
defaultLink = ''
if len(parts) > 1:
defaultLink = linkValue
if not existingPath or not existingPathAtr:
out[path][attribute]["conditions"] = {}
out[path][attribute]["conditions"]["mapping"] = {}
out[path][attribute]["conditions"]["linkedAttribute"] = link
elif data[path][attribute].get("conditions") is None:
out[path][attribute]["conditions"] = {}
out[path][attribute]["conditions"]["mapping"] = {}
out[path][attribute]["conditions"]["linkedAttribute"] = link
else:
out[path][attribute]["conditions"] = data[path][attribute]["conditions"]
if defaultLink:
out[path][attribute]["conditions"]["defaultLink"] = defaultLink
out[path][attribute]["conditions"]["mapping"][linkValue] = {"codelist": name, "allowedCodes": allowedCodes}
else:
out[path][attribute]["codelist"] = name
out[path][attribute]["allowedCodes"] = allowedCodes
# add validation rules
rule_mapping = find_equivalent_mapping_element(mapping, rule_mappings)
if rule_mapping is not None:
validation_rules = rule_mapping.find('validation-rules')
if validation_rules is not None:
for validation_rule in validation_rules:
for child in validation_rule:
if mapping.find('condition') is not None:
out[path][attribute]["conditions"]["mapping"][linkValue][child.tag] = child.text
else:
out[path][attribute][child.tag] = child.text
if existingPath:
data[path][attribute] = out[path][attribute]
else:
data.update(out)
return data
rule_mappings = ET.parse('rule_mapping.xml')
mappings = ET.parse('IATI-Codelists/mapping.xml')
with open('codelist_rules.json', 'w') as fp:
data = mapping_to_codelist_rules(mappings, rule_mappings)
json.dump(data, fp, indent=2)