diff --git a/augur/data/schema-export-v2.json b/augur/data/schema-export-v2.json index edf5b7408..cff363e3c 100644 --- a/augur/data/schema-export-v2.json +++ b/augur/data/schema-export-v2.json @@ -51,44 +51,7 @@ } }, "genome_annotations": { - "description": "Genome annotations (e.g. genes), relative to the reference genome", - "$comment": "Required for the entropy panel", - "type": "object", - "required": ["nuc"], - "additionalProperties": false, - "properties": { - "nuc": { - "type": "object", - "properties": { - "seqid":{ - "description": "Sequence on which the coordinates below are valid. Could be viral segment, bacterial contig, etc", - "$comment": "currently unused by Auspice", - "type": "string" - }, - "type": { - "description": "Type of the feature. could be mRNA, CDS, or similar", - "$comment": "currently unused by Auspice", - "type": "string" - }, - "start": { - "description": "Gene start position (one-based, following GFF format)", - "type": "number" - }, - "end": { - "description": "Gene end position (one-based closed, last position of feature, following GFF format)", - "type": "number" - }, - "strand": { - "description": "Positive or negative strand", - "type": "string", - "enum": ["-","+"] - } - } - } - }, - "patternProperties": { - "^[a-zA-Z0-9*_-]+$": {"$ref": "#/properties/meta/properties/genome_annotations/properties/nuc"} - } + "$ref": "genome_annotations#" }, "filters": { "description": "These appear as filters in the footer of Auspice (which populates the displayed values based upon the tree)", diff --git a/augur/validate.py b/augur/validate.py index 3c42c7b8f..4df92bdc8 100644 --- a/augur/validate.py +++ b/augur/validate.py @@ -25,7 +25,7 @@ class ValidateError(Exception): pass -def load_json_schema(path): +def load_json_schema(path, refs=None): ''' Load a JSON schema from the augur included set of schemas (located in augur/data) @@ -40,6 +40,12 @@ def load_json_schema(path): Validator.check_schema(schema) except jsonschema.exceptions.SchemaError as err: raise ValidateError(f"Schema {path} is not a valid JSON Schema ({Validator.META_SCHEMA['$schema']}). Error: {err}") + + if refs: + # Make the validator aware of additional schemas + schema_store = {k: json.loads(resource_string(__package__, os.path.join("data", v))) for k,v in refs.items()} + resolver = jsonschema.RefResolver.from_schema(schema,store=schema_store) + return Validator(schema, resolver=resolver) return Validator(schema) def load_json(path): @@ -163,7 +169,21 @@ def auspice_config_v2(config_json, **kwargs): validate(config, schema, config_json) def export_v2(main_json, **kwargs): - main_schema = load_json_schema("schema-export-v2.json") + # The main_schema uses references to other (local) JSONs so we need to + # define the mapping to use here. Because the main_schema uses the $id + # "https://nextstrain.org/schemas/dataset/v2" this is combined with the + # value we use for $ref to create a URI, and it is this URI we need to + # specify here so that the local file is used. If we don't provide this + # mapping then we'll attempt to fetch this URI, and nextstrain.org will + # return 404 with a JSON response "{'error': 'Not Found'}"... and jsonschema + # will treat that as a valid schema and proceed. For development purposes I + # suggest you remove the $id from the main schema to avoid remote schema + # requests. Perhaps one day we'll parse this straight from the schema + # itself. james August 2023 + refs = { + 'https://nextstrain.org/schemas/dataset/genome_annotations': "schema-annotations.json" + } + main_schema = load_json_schema("schema-export-v2.json", refs) if main_json.endswith("frequencies.json") or main_json.endswith("entropy.json") or main_json.endswith("sequences.json"): raise ValidateError("This validation subfunction is for the main `augur export v2` JSON only.")