Skip to content

Commit

Permalink
[validate] allow local $refs
Browse files Browse the repository at this point in the history
See added comments for details
  • Loading branch information
jameshadfield committed Aug 20, 2023
1 parent d6246ca commit 37f63b4
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 41 deletions.
1 change: 1 addition & 0 deletions augur/data/schema-annotations.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"type" : "object",
"$schema": "http://json-schema.org/draft-06/schema#",
"$id": "https://nextstrain.org/schemas/augur/annotations",
"title": "Schema for the 'annotations' property (node-data JSON) or the 'genome_annotations' property (auspice JSON)",
"properties": {
"nuc": {
Expand Down
39 changes: 1 addition & 38 deletions augur/data/schema-export-v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,44 +51,7 @@
}
},
"genome_annotations": {
"description": "Genome annotations (e.g. genes), relative to the reference genome",
"$comment": "Required for the entropy panel",
"type": "object",
"required": ["nuc"],
"additionalProperties": false,
"properties": {
"nuc": {
"type": "object",
"properties": {
"seqid":{
"description": "Sequence on which the coordinates below are valid. Could be viral segment, bacterial contig, etc",
"$comment": "currently unused by Auspice",
"type": "string"
},
"type": {
"description": "Type of the feature. could be mRNA, CDS, or similar",
"$comment": "currently unused by Auspice",
"type": "string"
},
"start": {
"description": "Gene start position (one-based, following GFF format)",
"type": "number"
},
"end": {
"description": "Gene end position (one-based closed, last position of feature, following GFF format)",
"type": "number"
},
"strand": {
"description": "Positive or negative strand",
"type": "string",
"enum": ["-","+"]
}
}
}
},
"patternProperties": {
"^[a-zA-Z0-9*_-]+$": {"$ref": "#/properties/meta/properties/genome_annotations/properties/nuc"}
}
"$ref": "https://nextstrain.org/schemas/augur/annotations"
},
"filters": {
"description": "These appear as filters in the footer of Auspice (which populates the displayed values based upon the tree)",
Expand Down
35 changes: 32 additions & 3 deletions augur/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class ValidateError(Exception):
pass


def load_json_schema(path):
def load_json_schema(path, refs=None):
'''
Load a JSON schema from the augur included set of schemas
(located in augur/data)
Expand All @@ -40,7 +40,28 @@ def load_json_schema(path):
Validator.check_schema(schema)
except jsonschema.exceptions.SchemaError as err:
raise ValidateError(f"Schema {path} is not a valid JSON Schema ({Validator.META_SCHEMA['$schema']}). Error: {err}")
return Validator(schema)

if refs:
# Make the validator aware of additional schemas
schema_store = {k: json.loads(resource_string(__package__, os.path.join("data", v))) for k,v in refs.items()}
resolver = jsonschema.RefResolver.from_schema(schema,store=schema_store)
schema_validator = Validator(schema, resolver=resolver)
else:
schema_validator = Validator(schema)

# By default $ref URLs which we don't define in a schema_store are fetched
# by jsonschema. This often indicates a typo (the $ref doesn't match the key
# of the schema_store) or we forgot to add a local mapping for a new $ref.
# Either way, Augur should not be accessing the network.
def resolve_remote(url):
# The exception type is not important as jsonschema will catch & re-raise as a RefResolutionError
raise Exception(f"The schema used for validation attempted to fetch the remote URL '{url!r}'. " +
"Augur should resolve schema references to local files, please check the schema used " +
"and update the appropriate schema_store as needed." )
schema_validator.resolver.resolve_remote = resolve_remote

return schema_validator


def load_json(path):
with open(path, 'rb') as fh:
Expand Down Expand Up @@ -163,7 +184,15 @@ def auspice_config_v2(config_json, **kwargs):
validate(config, schema, config_json)

def export_v2(main_json, **kwargs):
main_schema = load_json_schema("schema-export-v2.json")
# The main_schema uses references to other schemas, and the suggested use is
# to define these refs as valid URLs. Augur itself should not access schemas
# over the wire so we provide a mapping between URLs and filepaths here. The
# filepath is specified relative to ./augur/data (where all the schemas
# live).
refs = {
'https://nextstrain.org/schemas/augur/annotations': "schema-annotations.json"
}
main_schema = load_json_schema("schema-export-v2.json", refs)

if main_json.endswith("frequencies.json") or main_json.endswith("entropy.json") or main_json.endswith("sequences.json"):
raise ValidateError("This validation subfunction is for the main `augur export v2` JSON only.")
Expand Down

0 comments on commit 37f63b4

Please sign in to comment.