Skip to content

Commit

Permalink
Mapping stage picks up on lists in schema
Browse files Browse the repository at this point in the history
  • Loading branch information
pipliggins committed Feb 5, 2025
1 parent ed090ae commit a0dcc3b
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/adtl/autoparser/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def _value_options(f):
return ["True", "False", "None"]
elif "string" in self.target_types[f]:
return self.schema_properties[f].get("enum", np.nan)
elif "array" in self.target_types[f]:
return (
self.schema_properties[f].get("items", {}).get("enum", np.nan)
)
else:
return np.nan

Expand Down
110 changes: 110 additions & 0 deletions tests/test_autoparser/schemas/IB-sample.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"Case ID": {
"type": "integer",
"description": "Unique identifier for each case",
"PrimaryKey": true
},
"Age": {
"type": "integer",
"description": "Age of the patient",
"minimum": 0
},
"Gender": {
"type": "string",
"description": "Gender of the patient",
"enum": [
"Male",
"Female",
"Other"
]
},
"Location": {
"type": "string",
"description": "City or region where the case was reported"
},
"Date of Onset": {
"type": "string",
"format": "date",
"description": "Date when symptoms first appeared"
},
"Symptoms": {
"type": [
"array",
"null"
],
"description": "List of symptoms exhibited by the patient",
"items": {
"type": "string",
"enum": [
"fever",
"cough",
"dyspnea",
"fatigue",
"myalgia",
"headache",
"anosmia"
]
}
},
"Outcome": {
"type": [
"string",
"null"
],
"description": "Final outcome for the patient (recovered or deceased)",
"enum": [
"Recovered",
"Deceased",
null
]
},
"Vaccination Status": {
"type": [
"string",
"null"
],
"description": "Vaccination status of the patient",
"enum": [
"Yes",
"No",
"Partial",
"Unknown"
]
},
"Days to Recovery": {
"type": [
"integer",
"null"
],
"description": "Number of days to recover, null for deceased cases"
},
"Underlying Conditions": {
"type": [
"string",
"null"
],
"description": "Pre-existing health conditions of the patient",
"enum": [
"None",
"Asthma",
"Diabetes",
"Hypertension",
"Heart Disease",
"Chronic Lung Disease",
"Chronic Kidney Disease",
null
]
}
},
"required": [
"Case ID",
"Age",
"Gender",
"Location",
"Date of Onset"
],
"additionalProperties": false
}
11 changes: 11 additions & 0 deletions tests/test_autoparser/sources/IB-mapping.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
target_field,source_description,source_field,common_values,target_values,value_mapping
Case ID,Case Number,CaseNumber,,,
Age,Person Age,PersonAge,,,
Gender,Sex,Sex,"Male, F, Female, M","Male, Female, Other","female=Female, f=Female, male=Male, m=Male"
Location,City,City,"Chicago, Houston, Philly",,
Date of Onset,Onset Date,OnsetDate,,,
Symptoms,Reported Symptoms,ReportedSymptoms,"fever, fatigue, cough, short breath, headache, muscle pain, coughing","fever, cough, dyspnea, fatigue, myalgia, headache, anosmia","fever=fever, coughing=cough, cough=cough, fatigue=fatigue, short breath=dyspnea, muscle pain=myalgia, headache=headache"
Outcome,Health Outcome,HealthOutcome,"recovered, Recov, Dead, Recovered, Died","Recovered, Deceased, None","recov=Recovered, died=Deceased, recovered=Recovered, dead=Deceased"
Vaccination Status,Vaccination Status,VaxStatus,"yes, no, Partial, No, Yes, none","Yes, No, Partial, Unknown","partial=Partial, none=Unknown, no=No, yes=Yes"
Days to Recovery,Recovery Days,RecoveryDays,,,
Underlying Conditions,Pre-existing Conditions,PreexistingConditions,,"None, Asthma, Diabetes, Hypertension, Heart Disease, Chronic Lung Disease, Chronic Kidney Disease, None",
44 changes: 44 additions & 0 deletions tests/test_autoparser/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,50 @@ def test_target_values():
pd.testing.assert_series_equal(mapper.target_values, target_vals)


def test_target_fields_with_enum_lists():
mapper = MapperTest(
"tests/test_autoparser/sources/IB_sample_dd.csv",
Path("tests/test_autoparser/schemas/IB-sample.schema.json"),
"fr",
)
target_vals = pd.Series(
data=[
np.nan,
np.nan,
["Male", "Female", "Other"],
np.nan,
np.nan,
["fever", "cough", "dyspnea", "fatigue", "myalgia", "headache", "anosmia"],
["Recovered", "Deceased", None],
["Yes", "No", "Partial", "Unknown"],
np.nan,
[
"None",
"Asthma",
"Diabetes",
"Hypertension",
"Heart Disease",
"Chronic Lung Disease",
"Chronic Kidney Disease",
None,
],
],
index=[
"Case ID",
"Age",
"Gender",
"Location",
"Date of Onset",
"Symptoms",
"Outcome",
"Vaccination Status",
"Days to Recovery",
"Underlying Conditions",
],
)
pd.testing.assert_series_equal(mapper.target_values, target_vals)


def test_common_values():
mapper = ANIMAL_MAPPER

Expand Down

0 comments on commit a0dcc3b

Please sign in to comment.