diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 47405318f..e8d83a444 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -1104,23 +1104,40 @@ def type_validation( manifest_col: pd.Series, ) -> tuple[list[list[str]], list[list[str]]]: """ - Purpose: - Check if values for a given manifest attribue are the same type + Check if values for a given manifest attribute are the same type specified in val_rule. - Input: - - val_rule: str, Validation rule, specifying input type, either + + Args: + val_rule (str): Validation rule, specifying input type, either 'float', 'int', 'num', 'str' - - manifest_col: pd.Series, column for a given + manifest_col (pd.Series): column for a given attribute in the manifest + + Raises: + ValueError: If after splitting the validation rule by spaces, + there are no components left + ValueError: If after splitting the validation rule by spaces, + there are more than two components left + ValueError: If after splitting the validation rule by spaces, + the first component is not one of 'float', 'int', 'num', 'str' + Returns: - -This function will return errors when the user input value - does not match schema specifications. - logger.error or logger.warning. - Errors: list[str] Error details for further storage. - warnings: list[str] Warning details for further storage. - TODO: - Convert all inputs to .lower() just to prevent any entry errors. + tuple[list[list[str]], list[list[str]]]: _description_ """ + val_rule_components = val_rule.split(" ") + if len(val_rule_components) == 0: + raise ValueError("val_rule must contain at least one component.") + if len(val_rule_components) > 2: + raise ValueError("val_rule must contain no more than two components.") + val_rule_type = val_rule_components[0] + if val_rule_type not in ["float", "int", "num", "str"]: + raise ValueError( + ( + f"val_rule first component: {val_rule_type} must be one of " + "['float', 'int', 'num', 'str']" + ) + ) + specified_type = { "num": (int, np.int64, float), "int": (int, np.int64), @@ -1132,7 +1149,7 @@ def type_validation( warnings: list[list[str]] = [] # num indicates either a float or int. - if val_rule == "num": + if val_rule_type == "num": for i, value in enumerate(manifest_col): entry_has_value = self.get_entry_has_value( entry=value, @@ -1140,7 +1157,7 @@ def type_validation( ) if ( bool(value) - and not isinstance(value, specified_type[val_rule]) + and not isinstance(value, specified_type[val_rule_type]) and entry_has_value ): vr_errors, vr_warnings = GenerateError.generate_type_error( @@ -1152,10 +1169,9 @@ def type_validation( ) if vr_errors: errors.append(vr_errors) - # It seems impossible to get warnings with type rules if vr_warnings: warnings.append(vr_warnings) - elif val_rule in ["int", "float", "str"]: + elif val_rule_type in ["int", "float", "str"]: for i, value in enumerate(manifest_col): entry_has_value = self.get_entry_has_value( entry=value, @@ -1163,7 +1179,7 @@ def type_validation( ) if ( bool(value) - and not isinstance(value, specified_type[val_rule]) + and not isinstance(value, specified_type[val_rule_type]) and entry_has_value ): vr_errors, vr_warnings = GenerateError.generate_type_error( @@ -1175,7 +1191,6 @@ def type_validation( ) if vr_errors: errors.append(vr_errors) - # It seems impossible to get warnings with type rules if vr_warnings: warnings.append(vr_warnings) return errors, warnings diff --git a/tests/data/example.model.csv b/tests/data/example.model.csv index a85cf8cbf..7438c7145 100644 --- a/tests/data/example.model.csv +++ b/tests/data/example.model.csv @@ -32,10 +32,10 @@ Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f] Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f] Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f] Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$ -Check Num,,,,,TRUE,DataProperty,,,num -Check Float,,,,,TRUE,DataProperty,,,float -Check Int,,,,,TRUE,DataProperty,,,int -Check String,,,,,TRUE,DataProperty,,,str +Check Num,,,,,TRUE,DataProperty,,,num error +Check Float,,,,,TRUE,DataProperty,,,float error +Check Int,,,,,TRUE,DataProperty,,,int error +Check String,,,,,TRUE,DataProperty,,,str error Check URL,,,,,TRUE,DataProperty,,,url Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set diff --git a/tests/data/example.model.jsonld b/tests/data/example.model.jsonld index 3f13b188e..c4279a605 100644 --- a/tests/data/example.model.jsonld +++ b/tests/data/example.model.jsonld @@ -1258,7 +1258,7 @@ "sms:displayName": "Check Num", "sms:required": "sms:true", "sms:validationRules": [ - "num" + "num error" ] }, { @@ -1277,7 +1277,7 @@ "sms:displayName": "Check Float", "sms:required": "sms:true", "sms:validationRules": [ - "float" + "float error" ] }, { @@ -1296,7 +1296,7 @@ "sms:displayName": "Check Int", "sms:required": "sms:true", "sms:validationRules": [ - "int" + "int error" ] }, { @@ -1315,7 +1315,7 @@ "sms:displayName": "Check String", "sms:required": "sms:true", "sms:validationRules": [ - "str" + "str error" ] }, { diff --git a/tests/unit/test_validate_attribute.py b/tests/unit/test_validate_attribute.py index fa1070428..4e05ddc98 100644 --- a/tests/unit/test_validate_attribute.py +++ b/tests/unit/test_validate_attribute.py @@ -280,6 +280,104 @@ def test_generate_filename_error_unsupported_error_type( error_type="unsupported error type", ) + @pytest.mark.parametrize( + "input_rule, input_num, input_name, input_entry, expected_error, expected_warning", + [ + ( + "x", + 0, + "Patient", + "y", + [], + [ + 0, + "Patient", + "On row 0 the attribute Patient does not contain the proper value type x.", + "y", + ], + ), + ( + "x warning", + 0, + "Patient", + "y", + [], + [ + 0, + "Patient", + "On row 0 the attribute Patient does not contain the proper value type x.", + "y", + ], + ), + ( + "x error", + 0, + "Patient", + "y", + [ + 0, + "Patient", + "On row 0 the attribute Patient does not contain the proper value type x.", + "y", + ], + [], + ), + ], + ) + def test_generate_type_error( + self, + dmge: DataModelGraphExplorer, + input_rule: str, + input_num: int, + input_name: str, + input_entry: str, + expected_error: list[str], + expected_warning: list[str], + ) -> None: + """Testing for GenerateError.generate_type_error""" + error, warning = GenerateError.generate_type_error( + val_rule=input_rule, + row_num=input_num, + attribute_name=input_name, + invalid_entry=input_entry, + dmge=dmge, + ) + import logging + + logging.warning(error) + logging.warning(warning) + assert error == expected_error + assert warning == expected_warning + + @pytest.mark.parametrize( + "input_rule, input_num, input_name, input_entry, exception", + [ + # Empty rule or entry causes a key error + ("", 0, "x", "x", KeyError), + ("x", 0, "x", "", KeyError), + # Empty attribute causes an index error + ("x", 0, "", "x", IndexError), + ], + ) + def test_generate_type_error_exceptions( + self, + dmge: DataModelGraphExplorer, + input_rule: str, + input_num: int, + input_name: str, + input_entry: str, + exception: Exception, + ) -> None: + """Testing for GenerateError.generate_type_error""" + with pytest.raises(exception): + GenerateError.generate_type_error( + val_rule=input_rule, + row_num=input_num, + attribute_name=input_name, + invalid_entry=input_entry, + dmge=dmge, + ) + class TestValidateAttributeObject: """Testing for ValidateAttribute class with all Synapse calls mocked""" @@ -1792,15 +1890,19 @@ def test_type_validation_passing( "input_column, rule", [ (Series([1], name="Check String"), "str"), + (Series([1], name="Check String"), "str error"), (Series(["a"], name="Check Num"), "num"), + (Series(["a"], name="Check Num"), "num error"), (Series(["20"], name="Check Num"), "num"), (Series([1.1], name="Check Int"), "int"), (Series(["a"], name="Check Int"), "int"), + (Series(["a"], name="Check Int"), "int error"), (Series([1], name="Check Float"), "float"), (Series(["a"], name="Check Float"), "float"), + (Series(["a"], name="Check Float"), "float error"), ], ) - def test_type_validation_failing( + def test_type_validation_errors( self, va_obj: ValidateAttribute, input_column: Series, rule: str ) -> None: """ @@ -1814,20 +1916,60 @@ def test_type_validation_failing( @pytest.mark.parametrize( "input_column, rule", [ - (Series([1], name="Check String"), "str error"), (Series([1], name="Check String"), "str warning"), + (Series(["a"], name="Check Num"), "num warning"), + (Series(["a"], name="Check Int"), "int warning"), + (Series(["a"], name="Check Float"), "float warning"), ], ) - def test_type_validation_does_not_work( + def test_type_validation_warnings( self, va_obj: ValidateAttribute, input_column: Series, rule: str ) -> None: """ This tests ValidateAttribute.type_validation - This test shows that the msg level parameter doesn't work + This test shows failing examples using the type rule """ errors, warnings = va_obj.type_validation(rule, input_column) assert len(errors) == 0 - assert len(warnings) == 0 + assert len(warnings) == 1 + + @pytest.mark.parametrize( + "input_column, rule, exception, msg", + [ + ( + Series([1], name="Check String"), + "", + ValueError, + "val_rule first component: must be one of", + ), + ( + Series([1], name="Check String"), + "x", + ValueError, + "val_rule first component: x must be one of", + ), + ( + Series([1], name="Check String"), + "x x x", + ValueError, + "val_rule must contain no more than two components.", + ), + ], + ) + def test_type_validation_exceptions( + self, + va_obj: ValidateAttribute, + input_column: Series, + rule: str, + exception: Exception, + msg: str, + ) -> None: + """ + This tests ValidateAttribute.type_validation + This test shows failing examples using the type rule + """ + with pytest.raises(exception, match=msg): + va_obj.type_validation(rule, input_column) ################ # url_validation