diff --git a/tests/test_utils.py b/tests/test_utils.py index 525778185..8f6ef20e5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -19,6 +19,9 @@ from pandas.testing import assert_frame_equal from synapseclient.core.exceptions import SynapseHTTPError +from schematic.models.validate_manifest import ValidateManifest +from schematic.models.metadata import MetadataModel + from schematic.schemas.data_model_parser import DataModelParser from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer from schematic.schemas.data_model_jsonld import ( @@ -43,6 +46,7 @@ ) from schematic.utils import cli_utils, df_utils, general, io_utils, validate_utils +from schematic.utils.df_utils import load_df from schematic.utils.general import ( calculate_datetime, check_synapse_cache_size, @@ -189,6 +193,14 @@ (1073741825, 1073741824, 1181116006.4), ] +def get_metadataModel(helpers, model_name:str): + metadataModel = MetadataModel( + inputMModelLocation=helpers.get_data_path(model_name), + inputMModelLocationType="local", + data_model_labels="class_label", + ) + return metadataModel + # create temporary files with various size based on request @pytest.fixture() @@ -1060,6 +1072,76 @@ def test_validate_property_schema(self, helpers): assert error is None + @pytest.mark.parametrize( + ("manifest", "model", "root_node"), + [("mock_manifests/Patient_test_no_entry_for_cond_required_column.manifest.csv", + "example.model.csv", "Patient"), + ("mock_manifests/Valid_Test_Manifest_with_nones.csv", + "example_test_nones.model.csv", "MockComponent")] + ) + def test_convert_nan_entries_to_empty_strings( + self, helpers, manifest, model, root_node): + # Get manifest and data model path + manifest_path = helpers.get_data_path(manifest) + model_path = helpers.get_data_path(model) + + ## Gather parmeters needed to run validate_manifest_rules + errors = [] + load_args = { + "dtype": "string", + } + + dmge = helpers.get_data_model_graph_explorer(path=model) + + self.data_model_js = DataModelJSONSchema( + jsonld_path=model_path, graph=dmge.graph + ) + json_schema = self.data_model_js.get_json_validation_schema( + root_node, root_node + "_validation" + ) + + manifest = load_df( + manifest_path, + preserve_raw_input=False, + allow_na_values=True, + **load_args,) + + metadataModel = get_metadataModel(helpers, model) + + # Instantiate Validate manifest, and run manifest validation + # In this step the manifest is modified while running rule + # validation so need to do this step to get the updated manfest. + vm = ValidateManifest( + errors, manifest, manifest_path, dmge, json_schema) + manifest, vmr_errors, vmr_warnings = vm.validate_manifest_rules( + manifest, dmge, restrict_rules=False, project_scope=["syn54126707"], + ) + + # Run convert nan function + output = validate_utils.convert_nan_entries_to_empty_strings( + manifest=manifest + ) + + # Compare post rule validation manifest with output manifest looking + # for expected nan to empty string conversion + if root_node == 'Patient': + assert manifest['Family History'][0] == [''] + assert output['Family History'][0] == [''] + elif root_node == 'MockComponent': + assert manifest['Check List'][2] == [''] + assert manifest['Check List Like Enum'][2] == [] + assert type(manifest['Check NA'][2]) == type(pd.NA) + + assert output['Check List'][2] == [''] + assert output['Check List Like Enum'][2] == [] + + + def test_get_list_robustness(self, helpers): + return + + def parse_str_series_to_list(self, helpers): + return + @pytest.mark.parametrize( "rule", [