From 17655526e5871f537bdd8ef93e48cf2cac39d3e6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 25 Oct 2024 09:18:43 -0700 Subject: [PATCH 1/4] updated data model type rules to include error param --- tests/data/example.model.csv | 8 ++++---- tests/data/example.model.jsonld | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/data/example.model.csv b/tests/data/example.model.csv index a85cf8cbf..7438c7145 100644 --- a/tests/data/example.model.csv +++ b/tests/data/example.model.csv @@ -32,10 +32,10 @@ Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f] Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f] Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f] Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$ -Check Num,,,,,TRUE,DataProperty,,,num -Check Float,,,,,TRUE,DataProperty,,,float -Check Int,,,,,TRUE,DataProperty,,,int -Check String,,,,,TRUE,DataProperty,,,str +Check Num,,,,,TRUE,DataProperty,,,num error +Check Float,,,,,TRUE,DataProperty,,,float error +Check Int,,,,,TRUE,DataProperty,,,int error +Check String,,,,,TRUE,DataProperty,,,str error Check URL,,,,,TRUE,DataProperty,,,url Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set diff --git a/tests/data/example.model.jsonld b/tests/data/example.model.jsonld index 3f13b188e..c4279a605 100644 --- a/tests/data/example.model.jsonld +++ b/tests/data/example.model.jsonld @@ -1258,7 +1258,7 @@ "sms:displayName": "Check Num", "sms:required": "sms:true", "sms:validationRules": [ - "num" + "num error" ] }, { @@ -1277,7 +1277,7 @@ "sms:displayName": "Check Float", "sms:required": "sms:true", "sms:validationRules": [ - "float" + "float error" ] }, { @@ -1296,7 +1296,7 @@ "sms:displayName": "Check Int", "sms:required": "sms:true", "sms:validationRules": [ - "int" + "int error" ] }, { @@ -1315,7 +1315,7 @@ "sms:displayName": "Check String", "sms:required": "sms:true", "sms:validationRules": [ - "str" + "str error" ] }, { From beb8663efd7f93be7b0a1b6233778958fb7776a5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 25 Oct 2024 09:19:13 -0700 Subject: [PATCH 2/4] fix validate type attribute to use msg level param --- schematic/models/validate_attribute.py | 16 ++-- tests/unit/test_validate_attribute.py | 114 +++++++++++++++++++++++-- 2 files changed, 118 insertions(+), 12 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 47405318f..227be45f7 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -1105,7 +1105,7 @@ def type_validation( ) -> tuple[list[list[str]], list[list[str]]]: """ Purpose: - Check if values for a given manifest attribue are the same type + Check if values for a given manifest attribute are the same type specified in val_rule. Input: - val_rule: str, Validation rule, specifying input type, either @@ -1121,6 +1121,10 @@ def type_validation( TODO: Convert all inputs to .lower() just to prevent any entry errors. """ + + val_rule_components = val_rule.split(" ") + val_rule_type = val_rule_components[0] + specified_type = { "num": (int, np.int64, float), "int": (int, np.int64), @@ -1132,7 +1136,7 @@ def type_validation( warnings: list[list[str]] = [] # num indicates either a float or int. - if val_rule == "num": + if val_rule_type == "num": for i, value in enumerate(manifest_col): entry_has_value = self.get_entry_has_value( entry=value, @@ -1140,7 +1144,7 @@ def type_validation( ) if ( bool(value) - and not isinstance(value, specified_type[val_rule]) + and not isinstance(value, specified_type[val_rule_type]) and entry_has_value ): vr_errors, vr_warnings = GenerateError.generate_type_error( @@ -1152,10 +1156,9 @@ def type_validation( ) if vr_errors: errors.append(vr_errors) - # It seems impossible to get warnings with type rules if vr_warnings: warnings.append(vr_warnings) - elif val_rule in ["int", "float", "str"]: + elif val_rule_type in ["int", "float", "str"]: for i, value in enumerate(manifest_col): entry_has_value = self.get_entry_has_value( entry=value, @@ -1163,7 +1166,7 @@ def type_validation( ) if ( bool(value) - and not isinstance(value, specified_type[val_rule]) + and not isinstance(value, specified_type[val_rule_type]) and entry_has_value ): vr_errors, vr_warnings = GenerateError.generate_type_error( @@ -1175,7 +1178,6 @@ def type_validation( ) if vr_errors: errors.append(vr_errors) - # It seems impossible to get warnings with type rules if vr_warnings: warnings.append(vr_warnings) return errors, warnings diff --git a/tests/unit/test_validate_attribute.py b/tests/unit/test_validate_attribute.py index fa1070428..3da5afbf7 100644 --- a/tests/unit/test_validate_attribute.py +++ b/tests/unit/test_validate_attribute.py @@ -280,6 +280,104 @@ def test_generate_filename_error_unsupported_error_type( error_type="unsupported error type", ) + @pytest.mark.parametrize( + "input_rule, input_num, input_name, input_entry, expected_error, expected_warning", + [ + ( + "x", + 0, + "Patient", + "y", + [], + [ + 0, + "Patient", + "On row 0 the attribute Patient does not contain the proper value type x.", + "y", + ], + ), + ( + "x warning", + 0, + "Patient", + "y", + [], + [ + 0, + "Patient", + "On row 0 the attribute Patient does not contain the proper value type x.", + "y", + ], + ), + ( + "x error", + 0, + "Patient", + "y", + [ + 0, + "Patient", + "On row 0 the attribute Patient does not contain the proper value type x.", + "y", + ], + [], + ), + ], + ) + def test_generate_type_error( + self, + dmge: DataModelGraphExplorer, + input_rule: str, + input_num: int, + input_name: str, + input_entry: str, + expected_error: list[str], + expected_warning: list[str], + ) -> None: + """Testing for GenerateError.generate_type_error""" + error, warning = GenerateError.generate_type_error( + val_rule=input_rule, + row_num=input_num, + attribute_name=input_name, + invalid_entry=input_entry, + dmge=dmge, + ) + import logging + + logging.warning(error) + logging.warning(warning) + assert error == expected_error + assert warning == expected_warning + + @pytest.mark.parametrize( + "input_rule, input_num, input_name, input_entry, exception", + [ + # Empty rule or entry causes a key error + ("", 0, "x", "x", KeyError), + ("x", 0, "x", "", KeyError), + # Empty attribute causes an index error + ("x", 0, "", "x", IndexError), + ], + ) + def test_generate_type_error_exceptions( + self, + dmge: DataModelGraphExplorer, + input_rule: str, + input_num: int, + input_name: str, + input_entry: str, + exception: Exception, + ) -> None: + """Testing for GenerateError.generate_type_error""" + with pytest.raises(exception): + GenerateError.generate_type_error( + val_rule=input_rule, + row_num=input_num, + attribute_name=input_name, + invalid_entry=input_entry, + dmge=dmge, + ) + class TestValidateAttributeObject: """Testing for ValidateAttribute class with all Synapse calls mocked""" @@ -1792,15 +1890,19 @@ def test_type_validation_passing( "input_column, rule", [ (Series([1], name="Check String"), "str"), + (Series([1], name="Check String"), "str error"), (Series(["a"], name="Check Num"), "num"), + (Series(["a"], name="Check Num"), "num error"), (Series(["20"], name="Check Num"), "num"), (Series([1.1], name="Check Int"), "int"), (Series(["a"], name="Check Int"), "int"), + (Series(["a"], name="Check Int"), "int error"), (Series([1], name="Check Float"), "float"), (Series(["a"], name="Check Float"), "float"), + (Series(["a"], name="Check Float"), "float error"), ], ) - def test_type_validation_failing( + def test_type_validation_errors( self, va_obj: ValidateAttribute, input_column: Series, rule: str ) -> None: """ @@ -1814,20 +1916,22 @@ def test_type_validation_failing( @pytest.mark.parametrize( "input_column, rule", [ - (Series([1], name="Check String"), "str error"), (Series([1], name="Check String"), "str warning"), + (Series(["a"], name="Check Num"), "num warning"), + (Series(["a"], name="Check Int"), "int warning"), + (Series(["a"], name="Check Float"), "float warning"), ], ) - def test_type_validation_does_not_work( + def test_type_validation_warnings( self, va_obj: ValidateAttribute, input_column: Series, rule: str ) -> None: """ This tests ValidateAttribute.type_validation - This test shows that the msg level parameter doesn't work + This test shows failing examples using the type rule """ errors, warnings = va_obj.type_validation(rule, input_column) assert len(errors) == 0 - assert len(warnings) == 0 + assert len(warnings) == 1 ################ # url_validation From 525e3da2150df0abe3b367ef576b108582184a2b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 25 Oct 2024 10:46:56 -0700 Subject: [PATCH 3/4] added error handling --- schematic/models/validate_attribute.py | 37 +++++++++++++++++--------- tests/unit/test_validate_attribute.py | 23 ++++++++++++++++ 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 227be45f7..bf0a64933 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -1104,26 +1104,37 @@ def type_validation( manifest_col: pd.Series, ) -> tuple[list[list[str]], list[list[str]]]: """ - Purpose: - Check if values for a given manifest attribute are the same type + Check if values for a given manifest attribute are the same type specified in val_rule. - Input: - - val_rule: str, Validation rule, specifying input type, either + + Args: + val_rule (str): Validation rule, specifying input type, either 'float', 'int', 'num', 'str' - - manifest_col: pd.Series, column for a given + manifest_col (pd.Series): column for a given attribute in the manifest + + Raises: + ValueError: If after splitting the validation rule by spaces, + there are no components left + ValueError: If after splitting the validation rule by spaces, + there are more than two components left + ValueError: If after splitting the validation rule by spaces, + the first component is not one of 'float', 'int', 'num', 'str' + Returns: - -This function will return errors when the user input value - does not match schema specifications. - logger.error or logger.warning. - Errors: list[str] Error details for further storage. - warnings: list[str] Warning details for further storage. - TODO: - Convert all inputs to .lower() just to prevent any entry errors. + tuple[list[list[str]], list[list[str]]]: _description_ """ - val_rule_components = val_rule.split(" ") + if len(val_rule_components) == 0: + raise ValueError("val_rule must contain at least one component.") + if len(val_rule_components) > 2: + raise ValueError("val_rule must contain no more than two components.") val_rule_type = val_rule_components[0] + if val_rule_type not in ['float', 'int', 'num', 'str']: + raise ValueError(( + f"val_rule first component: {val_rule_type} must be one of " + "['float', 'int', 'num', 'str']" + )) specified_type = { "num": (int, np.int64, float), diff --git a/tests/unit/test_validate_attribute.py b/tests/unit/test_validate_attribute.py index 3da5afbf7..1b494ba5a 100644 --- a/tests/unit/test_validate_attribute.py +++ b/tests/unit/test_validate_attribute.py @@ -1933,6 +1933,29 @@ def test_type_validation_warnings( assert len(errors) == 0 assert len(warnings) == 1 + @pytest.mark.parametrize( + "input_column, rule, exception, msg", + [ + (Series([1], name="Check String"), "", ValueError, "val_rule first component: must be one of"), + (Series([1], name="Check String"), "x", ValueError, "val_rule first component: x must be one of"), + (Series([1], name="Check String"), "x x x", ValueError, "val_rule must contain no more than two components."), + ], + ) + def test_type_validation_exceptions( + self, + va_obj: ValidateAttribute, + input_column: Series, + rule: str, + exception: Exception, + msg: str, + ) -> None: + """ + This tests ValidateAttribute.type_validation + This test shows failing examples using the type rule + """ + with pytest.raises(exception, match=msg): + va_obj.type_validation(rule, input_column) + ################ # url_validation ################ From 5fcaac3c0550765a8f11e701ea0e3bfb2e849350 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 25 Oct 2024 10:53:05 -0700 Subject: [PATCH 4/4] run black --- schematic/models/validate_attribute.py | 12 +++++++----- tests/unit/test_validate_attribute.py | 21 ++++++++++++++++++--- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index bf0a64933..e8d83a444 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -1130,11 +1130,13 @@ def type_validation( if len(val_rule_components) > 2: raise ValueError("val_rule must contain no more than two components.") val_rule_type = val_rule_components[0] - if val_rule_type not in ['float', 'int', 'num', 'str']: - raise ValueError(( - f"val_rule first component: {val_rule_type} must be one of " - "['float', 'int', 'num', 'str']" - )) + if val_rule_type not in ["float", "int", "num", "str"]: + raise ValueError( + ( + f"val_rule first component: {val_rule_type} must be one of " + "['float', 'int', 'num', 'str']" + ) + ) specified_type = { "num": (int, np.int64, float), diff --git a/tests/unit/test_validate_attribute.py b/tests/unit/test_validate_attribute.py index 1b494ba5a..4e05ddc98 100644 --- a/tests/unit/test_validate_attribute.py +++ b/tests/unit/test_validate_attribute.py @@ -1936,9 +1936,24 @@ def test_type_validation_warnings( @pytest.mark.parametrize( "input_column, rule, exception, msg", [ - (Series([1], name="Check String"), "", ValueError, "val_rule first component: must be one of"), - (Series([1], name="Check String"), "x", ValueError, "val_rule first component: x must be one of"), - (Series([1], name="Check String"), "x x x", ValueError, "val_rule must contain no more than two components."), + ( + Series([1], name="Check String"), + "", + ValueError, + "val_rule first component: must be one of", + ), + ( + Series([1], name="Check String"), + "x", + ValueError, + "val_rule first component: x must be one of", + ), + ( + Series([1], name="Check String"), + "x x x", + ValueError, + "val_rule must contain no more than two components.", + ), ], ) def test_type_validation_exceptions(