Skip to content

Commit

Permalink
add test_convert_nan_entries_to_empty_strings and stubs for additiona…
Browse files Browse the repository at this point in the history
…l tests
  • Loading branch information
mialy-defelice committed May 6, 2024
1 parent 5056de1 commit a898893
Showing 1 changed file with 82 additions and 0 deletions.
82 changes: 82 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from pandas.testing import assert_frame_equal
from synapseclient.core.exceptions import SynapseHTTPError

from schematic.models.validate_manifest import ValidateManifest
from schematic.models.metadata import MetadataModel

from schematic.schemas.data_model_parser import DataModelParser
from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
from schematic.schemas.data_model_jsonld import (
Expand All @@ -43,6 +46,7 @@
)

from schematic.utils import cli_utils, df_utils, general, io_utils, validate_utils
from schematic.utils.df_utils import load_df
from schematic.utils.general import (
calculate_datetime,
check_synapse_cache_size,
Expand Down Expand Up @@ -189,6 +193,14 @@
(1073741825, 1073741824, 1181116006.4),
]

def get_metadataModel(helpers, model_name:str):
metadataModel = MetadataModel(
inputMModelLocation=helpers.get_data_path(model_name),
inputMModelLocationType="local",
data_model_labels="class_label",
)
return metadataModel


# create temporary files with various size based on request
@pytest.fixture()
Expand Down Expand Up @@ -1060,6 +1072,76 @@ def test_validate_property_schema(self, helpers):

assert error is None

@pytest.mark.parametrize(
("manifest", "model", "root_node"),
[("mock_manifests/Patient_test_no_entry_for_cond_required_column.manifest.csv",
"example.model.csv", "Patient"),
("mock_manifests/Valid_Test_Manifest_with_nones.csv",
"example_test_nones.model.csv", "MockComponent")]
)
def test_convert_nan_entries_to_empty_strings(
self, helpers, manifest, model, root_node):
# Get manifest and data model path
manifest_path = helpers.get_data_path(manifest)
model_path = helpers.get_data_path(model)

## Gather parmeters needed to run validate_manifest_rules
errors = []
load_args = {
"dtype": "string",
}

dmge = helpers.get_data_model_graph_explorer(path=model)

self.data_model_js = DataModelJSONSchema(
jsonld_path=model_path, graph=dmge.graph
)
json_schema = self.data_model_js.get_json_validation_schema(
root_node, root_node + "_validation"
)

manifest = load_df(
manifest_path,
preserve_raw_input=False,
allow_na_values=True,
**load_args,)

metadataModel = get_metadataModel(helpers, model)

# Instantiate Validate manifest, and run manifest validation
# In this step the manifest is modified while running rule
# validation so need to do this step to get the updated manfest.
vm = ValidateManifest(
errors, manifest, manifest_path, dmge, json_schema)
manifest, vmr_errors, vmr_warnings = vm.validate_manifest_rules(
manifest, dmge, restrict_rules=False, project_scope=["syn54126707"],
)

# Run convert nan function
output = validate_utils.convert_nan_entries_to_empty_strings(
manifest=manifest
)

# Compare post rule validation manifest with output manifest looking
# for expected nan to empty string conversion
if root_node == 'Patient':
assert manifest['Family History'][0] == ['<NA>']
assert output['Family History'][0] == ['']
elif root_node == 'MockComponent':
assert manifest['Check List'][2] == ['<NA>']
assert manifest['Check List Like Enum'][2] == []
assert type(manifest['Check NA'][2]) == type(pd.NA)

assert output['Check List'][2] == ['']
assert output['Check List Like Enum'][2] == []


def test_get_list_robustness(self, helpers):
return

def parse_str_series_to_list(self, helpers):
return

@pytest.mark.parametrize(
"rule",
[
Expand Down

0 comments on commit a898893

Please sign in to comment.