Skip to content

Commit

Permalink
Merge pull request #1387 from Sage-Bionetworks/develop-unique-cross-r…
Browse files Browse the repository at this point in the history
…ule-FDS-1766

Match None Manifest Validation Rule FDS-1766
  • Loading branch information
mialy-defelice authored Apr 1, 2024
2 parents 4bb4593 + d1deb0e commit 8458565
Show file tree
Hide file tree
Showing 9 changed files with 758 additions and 235 deletions.
1 change: 1 addition & 0 deletions schematic/models/GE_Helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def build_expectation_suite(
# "url": "expect_column_values_to_be_valid_urls",
# "matchAtLeastOne": "expect_foreign_keys_in_column_a_to_exist_in_column_b",
# "matchExactlyOne": "expect_foreign_keys_in_column_a_to_exist_in_column_b",
# "matchNone": "expect_compound_columns_to_be_unique",
}

# create blank expectation suite
Expand Down
836 changes: 627 additions & 209 deletions schematic/models/validate_attribute.py

Large diffs are not rendered by default.

14 changes: 8 additions & 6 deletions schematic/models/validate_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def validate_manifest_rules(
"regex.*",
"matchAtLeastOne.*",
"matchExactlyOne.*",
"matchNone.*",
]

in_house_rules = [
Expand All @@ -128,6 +129,7 @@ def validate_manifest_rules(
"list",
"matchAtLeastOne.*",
"matchExactlyOne.*",
"matchNone.*",
]

# initialize error and warning handling lists.
Expand Down Expand Up @@ -180,6 +182,10 @@ def validate_manifest_rules(

t_err = perf_counter()
regex_re = re.compile("regex.*")

# Instantiate Validate Attribute
validate_attribute = ValidateAttribute(dmge=dmge)

for col in manifest.columns:
# remove trailing/leading whitespaces from manifest
manifest.map(lambda x: x.strip() if isinstance(x, str) else x)
Expand Down Expand Up @@ -217,27 +223,23 @@ def validate_manifest_rules(
t_indiv_rule = perf_counter()
# Validate for each individual validation rule.
validation_method = getattr(
ValidateAttribute, validation_types[validation_type]["type"]
validate_attribute, validation_types[validation_type]["type"]
)

if validation_type == "list":
vr_errors, vr_warnings, manifest_col = validation_method(
self,
rule,
manifest[col],
dmge,
)
manifest[col] = manifest_col
elif validation_type.lower().startswith("match"):
vr_errors, vr_warnings = validation_method(
self, rule, manifest[col], project_scope, dmge, access_token
rule, manifest[col], project_scope, access_token
)
else:
vr_errors, vr_warnings = validation_method(
self,
rule,
manifest[col],
dmge,
)
# Check for validation rule errors and add them to other errors.
if vr_errors:
Expand Down
7 changes: 7 additions & 0 deletions schematic/utils/validate_rules_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ def validation_rule_info() -> dict[str, Rule]:
"default_message_level": "warning",
"fixed_arg": None,
},
"matchNone": {
"arguments": (3, 2),
"type": "cross_validation",
"complementary_rules": None,
"default_message_level": "warning",
"fixed_arg": None,
},
"recommended": {
"arguments": (1, 0),
"type": "content_validation",
Expand Down
4 changes: 3 additions & 1 deletion tests/data/example.model.csv
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ CRAM,,,"Genome Build, Genome FASTA",,FALSE,ValidValue,,,
CSV/TSV,,,Genome Build,,FALSE,ValidValue,,,
Genome Build,,"GRCh37, GRCh38, GRCm38, GRCm39",,,TRUE,DataProperty,,,
Genome FASTA,,,,,TRUE,DataProperty,,,
MockComponent,,,"Component, Check List, Check Regex List, Check Regex Single, Check Regex Format, Check Regex Integer, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Recommended, Check Ages, Check Unique, Check Range, Check Date, Check NA",,FALSE,DataType,,,
MockComponent,,,"Component, Check List, Check Regex List, Check Regex Single, Check Regex Format, Check Regex Integer, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Match None, Check Match None values, Check Recommended, Check Ages, Check Unique, Check Range, Check Date, Check NA",,FALSE,DataType,,,
Check List,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict
Check Regex List,,,,,TRUE,DataProperty,,,list strict::regex match [a-f]
Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f]
Expand All @@ -32,8 +32,10 @@ Check String,,,,,TRUE,DataProperty,,,str
Check URL,,,,,TRUE,DataProperty,,,url
Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set
Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set
Check Match None,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNone set error
Check Match at Least values,,,,,TRUE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value
Check Match Exactly values,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value
Check Match None values,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNonevalues value error
Check Recommended,,,,,FALSE,DataProperty,,,recommended
Check Ages,,,,,TRUE,DataProperty,,,protectAges
Check Unique,,,,,TRUE,DataProperty,,,unique error
Expand Down
44 changes: 44 additions & 0 deletions tests/data/example.model.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,12 @@
{
"@id": "bts:CheckMatchExactlyvalues"
},
{
"@id": "bts:CheckMatchNone"
},
{
"@id": "bts:CheckMatchNonevalues"
},
{
"@id": "bts:CheckRecommended"
},
Expand Down Expand Up @@ -1221,6 +1227,44 @@
"matchExactlyOne MockComponent.checkMatchExactlyvalues value"
]
},
{
"@id": "bts:CheckMatchNone",
"@type": "rdfs:Class",
"rdfs:comment": "TBD",
"rdfs:label": "CheckMatchNone",
"rdfs:subClassOf": [
{
"@id": "bts:DataProperty"
}
],
"schema:isPartOf": {
"@id": "http://schema.biothings.io"
},
"sms:displayName": "Check Match None",
"sms:required": "sms:true",
"sms:validationRules": [
"matchNone MockComponent.checkMatchNone set error"
]
},
{
"@id": "bts:CheckMatchNonevalues",
"@type": "rdfs:Class",
"rdfs:comment": "TBD",
"rdfs:label": "CheckMatchNonevalues",
"rdfs:subClassOf": [
{
"@id": "bts:DataProperty"
}
],
"schema:isPartOf": {
"@id": "http://schema.biothings.io"
},
"sms:displayName": "Check Match None values",
"sms:required": "sms:true",
"sms:validationRules": [
"matchNone MockComponent.checkMatchNonevalues value error"
]
},
{
"@id": "bts:CheckRecommended",
"@type": "rdfs:Class",
Expand Down
10 changes: 5 additions & 5 deletions tests/data/mock_manifests/Invalid_Test_Manifest.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Component,Check List,Check Regex List,Check Regex Single,Check Regex Format,Check Regex Integer,Check Num,Check Float,Check Int,Check String,Check URL,Check Match at Least,Check Match at Least values,Check Match Exactly,Check Match Exactly values,Check Recommended,Check Ages,Check Unique,Check Range,Check Date,Check NA
MockComponent,"ab,cd","ab,cd,ef",a,a,5.4,6,99.65,7,valid,https://www.google.com/,1738,1738,8085,98085,,6549,str1,70,32-984,7
MockComponent,invalid list values,ab cd ef,q,m,0,c,99,5.63,94,http://googlef.com/,7163,51100,9965,71738,,32851,str1,30,notADate,9.5
MockComponent,"ab,cd","ab,cd,ef",b,b,683902,6.5,62.3,2,valid,https://github.com/Sage-Bionetworks/schematic,8085,8085,1738,210065,,6550,str1,90,84-43-094,Not Applicable
,,,,,,,,,,,,,,,,,,,
Component,Check List,Check Regex List,Check Regex Single,Check Regex Format,Check Regex Integer,Check Num,Check Float,Check Int,Check String,Check URL,Check Match at Least,Check Match at Least values,Check Match Exactly,Check Match Exactly values,Check Match None,Check Match None values,Check Recommended,Check Ages,Check Unique,Check Range,Check Date,Check NA
MockComponent,"ab,cd","ab,cd,ef",a,a,5.4,6,99.65,7,valid,https://www.google.com/,1738,1738,8085,98085,1911,334,,6549,str1,70,32-984,7
MockComponent,invalid list values,ab cd ef,q,m,0,c,99,5.63,94,http://googlef.com/,7163,51100,9965,71738,123,717,,32851,str1,30,notADate,9.5
MockComponent,"ab,cd","ab,cd,ef",b,b,683902,6.5,62.3,2,valid,https://github.com/Sage-Bionetworks/schematic,8085,8085,1738,210065,1427,123,,6550,str1,90,84-43-094,Not Applicable
,,,,,,,,,,,,,,,,,,,,,
10 changes: 5 additions & 5 deletions tests/data/mock_manifests/Valid_Test_Manifest.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Component,Check List,Check Regex List,Check Regex Single,Check Regex Format,Check Regex Integer,Check Num,Check Float,Check Int,Check String,Check URL,Check Match at Least,Check Match at Least values,Check Match Exactly,Check Match Exactly values,Check Recommended,Check Ages,Check Unique,Check Range,Check Date,Check NA
MockComponent,"ab,cd","a,c,f",a,a,0,6,99.65,7,valid,https://www.google.com/,1738,1738,8085,8085,,6571,str1,75,10/21/2022,Not Applicable
MockComponent,"ab,cd","a,c,f",e,b,1234,71,58.4,3,valid,https://www.google.com/,9965,9965,9965,9965,,6571,str2,80,October 21 2022,8
MockComponent,"ab,cd","b,d,f",b,c,683902,6.5,62.3,2,valid,https://www.google.com/,8085,8085,1738,1738,present,32849,str3,95,10/21/2022,Not Applicable
MockComponent,"ab,cd","b,d,f",b,c,0,6.5,62.3,2,valid,https://www.google.com/,79,79,7,7,,32849,str4,55,21/10/2022,695
Component,Check List,Check Regex List,Check Regex Single,Check Regex Format,Check Regex Integer,Check Num,Check Float,Check Int,Check String,Check URL,Check Match at Least,Check Match at Least values,Check Match Exactly,Check Match Exactly values,Check Match None,Check Match None values,Check Recommended,Check Ages,Check Unique,Check Range,Check Date,Check NA
MockComponent,"ab,cd","a,c,f",a,a,0,6,99.65,7,valid,https://www.google.com/,1738,1738,8085,8085,1911,334,,6571,str1,75,10/21/2022,Not Applicable
MockComponent,"ab,cd","a,c,f",e,b,1234,71,58.4,3,valid,https://www.google.com/,9965,9965,9965,9965,1655,717,,6571,str2,80,October 21 2022,8
MockComponent,"ab,cd","b,d,f",b,c,683902,6.5,62.3,2,valid,https://www.google.com/,8085,8085,1738,1738,1427,206,present,32849,str3,95,10/21/2022,Not Applicable
MockComponent,"ab,cd","b,d,f",b,c,0,6.5,62.3,2,valid,https://www.google.com/,79,79,7,7,6635,608,,32849,str4,55,21/10/2022,695
67 changes: 58 additions & 9 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_valid_manifest(self, helpers, metadataModel):
errors, warnings = metadataModel.validateModelManifest(
manifestPath=manifestPath,
rootNode=rootNode,
project_scope=["syn23643250"],
project_scope=["syn54126707"],
)

assert errors == []
Expand All @@ -75,7 +75,7 @@ def test_invalid_manifest(self, helpers, dmge, metadataModel):
errors, warnings = metadataModel.validateModelManifest(
manifestPath=manifestPath,
rootNode=rootNode,
project_scope=["syn23643250"],
project_scope=["syn54126707"],
)

# Check errors
Expand Down Expand Up @@ -190,6 +190,30 @@ def test_invalid_manifest(self, helpers, dmge, metadataModel):
invalid_entry=["30"],
)[0] in errors

assert (
GenerateError.generate_cross_warning(
val_rule="matchNone error",
row_num=["3"],
attribute_name="Check Match None",
manifest_id=["syn54126950"],
invalid_entry=["123"],
dmge=dmge,
)[0]
in errors
)

assert (
GenerateError.generate_cross_warning(
val_rule="matchNone value error",
row_num=["4"],
attribute_name="Check Match None values",
invalid_entry=["123"],
dmge=dmge,
)[0]
in errors
)


# check warnings
assert GenerateError.generate_content_error(
val_rule="recommended",
Expand All @@ -210,7 +234,7 @@ def test_invalid_manifest(self, helpers, dmge, metadataModel):
row_num=["3"],
attribute_name="Check Match at Least",
invalid_entry=["7163"],
missing_manifest_ID=["syn27600110", "syn29381803"],
manifest_id=["syn54126997", "syn54127001"],
dmge=dmge,
)[1] in warnings

Expand All @@ -226,13 +250,13 @@ def test_invalid_manifest(self, helpers, dmge, metadataModel):
GenerateError.generate_cross_warning(
val_rule="matchExactlyOne",
attribute_name="Check Match Exactly",
matching_manifests=["syn29862078", "syn27648165"],
matching_manifests=["syn54126950", "syn54127008"],
dmge=dmge,
)[1] in warnings \
or GenerateError.generate_cross_warning(
val_rule="matchExactlyOne",
attribute_name="Check Match Exactly",
matching_manifests=["syn29862066", "syn27648165"],
matching_manifests=["syn54127702", "syn54127008"],
dmge=dmge,
)[1] in warnings

Expand All @@ -243,6 +267,7 @@ def test_invalid_manifest(self, helpers, dmge, metadataModel):
invalid_entry=["71738", "98085", "210065"],
dmge=dmge,
)[1]

warning_in_list = [cross_warning[1] in warning for warning in warnings]
assert any(warning_in_list)

Expand All @@ -255,7 +280,7 @@ def test_in_house_validation(self, helpers, dmge, metadataModel):
manifestPath=manifestPath,
rootNode=rootNode,
restrict_rules=True,
project_scope=["syn23643250"],
project_scope=["syn54126707"],
)

# Check errors
Expand Down Expand Up @@ -342,13 +367,36 @@ def test_in_house_validation(self, helpers, dmge, metadataModel):
dmge=dmge,
)[0] in errors

assert (
GenerateError.generate_cross_warning(
val_rule="matchNone error",
row_num=["3"],
attribute_name="Check Match None",
manifest_id=["syn54126950"],
invalid_entry=["123"],
dmge=dmge,
)[0]
in errors
)

assert (
GenerateError.generate_cross_warning(
val_rule="matchNone value error",
row_num=["4"],
attribute_name="Check Match None values",
invalid_entry=["123"],
dmge=dmge,
)[0]
in errors
)

# Check Warnings
assert GenerateError.generate_cross_warning(
val_rule="matchAtLeastOne",
row_num=["3"],
attribute_name="Check Match at Least",
invalid_entry=["7163"],
missing_manifest_ID=["syn27600110", "syn29381803"],
manifest_id=["syn54126997", "syn54127001"],
dmge=dmge,
)[1] in warnings

Expand All @@ -364,13 +412,13 @@ def test_in_house_validation(self, helpers, dmge, metadataModel):
GenerateError.generate_cross_warning(
val_rule="matchExactlyOne",
attribute_name="Check Match Exactly",
matching_manifests=["syn29862078", "syn27648165"],
matching_manifests=["syn54126950", "syn54127008"],
dmge=dmge,
)[1] in warnings \
or GenerateError.generate_cross_warning(
val_rule="matchExactlyOne",
attribute_name="Check Match Exactly",
matching_manifests=["syn29862066", "syn27648165"],
matching_manifests=["syn54127702", "syn54127008"],
dmge=dmge,
)[1] in warnings

Expand All @@ -382,6 +430,7 @@ def test_in_house_validation(self, helpers, dmge, metadataModel):
dmge=dmge,
)[1] in warnings


@pytest.mark.parametrize(
"manifest_path",
[
Expand Down

0 comments on commit 8458565

Please sign in to comment.