Skip to content

Commit

Permalink
Merge pull request #209 from kbase/dts-manifest-schema
Browse files Browse the repository at this point in the history
DTS manifest schema
  • Loading branch information
briehl authored Dec 13, 2024
2 parents afee534 + 05b619b commit 5a7c064
Show file tree
Hide file tree
Showing 11 changed files with 623 additions and 307 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ defusedxml = "==0.7.1"
frozendict = "==2.3.8"
globus-sdk = "==1.6.1"
hypothesis = "==6.81.1"
jsonschema = "==4.23.0"
openpyxl = "==3.1.2"
pandas = "==2.2.3"
pytest = "==7.4.0"
Expand Down
676 changes: 390 additions & 286 deletions Pipfile.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion deployment/conf/deployment.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ META_DIR = /kb/deployment/lib/src/data/metadata/
DATA_DIR = /kb/deployment/lib/src/data/bulk/
AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token
CONCIERGE_PATH = /kbaseconcierge
FILE_EXTENSION_MAPPINGS = /kb/deployment/conf/supported_apps_w_extensions.json
FILE_EXTENSION_MAPPINGS = /kb/deployment/conf/supported_apps_w_extensions.json
DTS_MANIFEST_SCHEMA = /kb/deployment/import_specifications/schema/dts_manifest_schema.json
3 changes: 2 additions & 1 deletion deployment/conf/testing.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ DATA_DIR = ./data/bulk/
AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token
CONCIERGE_PATH = /kbaseconcierge
FILE_EXTENSION_MAPPINGS = ./deployment/conf/supported_apps_w_extensions.json
;FILE_EXTENSION_MAPPINGS_PYCHARM = ../deployment/conf/supported_apps_w_extensions.json
;FILE_EXTENSION_MAPPINGS_PYCHARM = ../deployment/conf/supported_apps_w_extensions.json
DTS_MANIFEST_SCHEMA = ./import_specifications/schema/dts_manifest_schema.json
39 changes: 39 additions & 0 deletions import_specifications/schema/dts_manifest_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"resources": {"type": "array"},
"instructions": {
"type": "object",
"properties": {
"protocol": {
"type": "string",
"const": "KBase narrative import"
},
"objects": {
"type": "array",
"items": {
"type": "object",
"properties": {
"data_type": {"type": "string"},
"parameters": {
"type": "object",
"additionalProperties": {
"oneOf": [
{"type": "string"},
{"type": "number"},
{"type": "boolean"},
{"type": "null"}
]
}
}
},
"required": ["data_type", "parameters"]
}
}
},
"required": ["protocol", "objects"]
}
},
"required": ["resources", "instructions"]
}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ defusedxml==0.7.1
frozendict==2.3.8
globus-sdk==1.6.1
hypothesis==6.81.1
jsonschema==4.23.0
openpyxl==3.1.2
pandas==2.2.3
pytest-aiohttp==1.0.4
Expand Down
43 changes: 41 additions & 2 deletions staging_service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import aiohttp_cors
from aiohttp import web
import jsonschema

from .app_error_formatter import format_import_spec_errors
from .auth2Client import KBaseAuth2
Expand Down Expand Up @@ -42,6 +43,7 @@
VERSION = "1.3.6"

_DATATYPE_MAPPINGS = None
_DTS_MANIFEST_VALIDATOR: jsonschema.Draft202012Validator | None = None

_APP_JSON = "application/json"

Expand Down Expand Up @@ -109,14 +111,17 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution:


def _make_dts_file_resolver() -> Callable[[Path], FileTypeResolution]:
"""Makes a DTS file resolver."""
"""Makes a DTS file resolver
This injects the DTS schema into the FileTypeResolution's parser call.
"""

def dts_file_resolver(path: PathPy) -> FileTypeResolution:
# must be a ".json" file
suffix = path.suffix[1:] if path.suffix else NO_EXTENSION
if suffix.lower() != JSON_EXTENSION:
return FileTypeResolution(unsupported_type=suffix)
return FileTypeResolution(parser=parse_dts_manifest)
return FileTypeResolution(parser=lambda p: parse_dts_manifest(p, _DTS_MANIFEST_VALIDATOR))

return dts_file_resolver

Expand Down Expand Up @@ -603,6 +608,26 @@ async def authorize_request(request):
return username


def load_and_validate_schema(schema_path: PathPy) -> jsonschema.Draft202012Validator:
"""Loads and validates a JSON schema from a path.
This expects a JSON schema loaded that validates under the 2020-12 draft schema
format: https://json-schema.org/draft/2020-12
This is tested directly as a function in test_app.py, but the whole workflow when
the app server is run is only tested manually.
"""
with open(schema_path) as schema_file:
dts_schema = json.load(schema_file)
try:
jsonschema.Draft202012Validator.check_schema(dts_schema)
except jsonschema.exceptions.SchemaError as err:
raise Exception(
f"Schema file {schema_path} is not a valid JSON schema: {err.message}"
) from err
return jsonschema.Draft202012Validator(dts_schema)


def inject_config_dependencies(config):
"""
# TODO this is pretty hacky dependency injection
Expand All @@ -615,6 +640,7 @@ def inject_config_dependencies(config):
META_DIR = config["staging_service"]["META_DIR"]
CONCIERGE_PATH = config["staging_service"]["CONCIERGE_PATH"]
FILE_EXTENSION_MAPPINGS = config["staging_service"]["FILE_EXTENSION_MAPPINGS"]
DTS_MANIFEST_SCHEMA_PATH = config["staging_service"]["DTS_MANIFEST_SCHEMA"]

if DATA_DIR.startswith("."):
DATA_DIR = os.path.normpath(os.path.join(os.getcwd(), DATA_DIR))
Expand All @@ -626,6 +652,10 @@ def inject_config_dependencies(config):
FILE_EXTENSION_MAPPINGS = os.path.normpath(
os.path.join(os.getcwd(), FILE_EXTENSION_MAPPINGS)
)
if DTS_MANIFEST_SCHEMA_PATH.startswith("."):
DTS_MANIFEST_SCHEMA_PATH = os.path.normpath(
os.path.join(os.getcwd(), DTS_MANIFEST_SCHEMA_PATH)
)

Path._DATA_DIR = DATA_DIR
Path._META_DIR = META_DIR
Expand All @@ -640,6 +670,15 @@ def inject_config_dependencies(config):
if Path._CONCIERGE_PATH is None:
raise Exception("Please provide CONCIERGE_PATH in the config file ")

if DTS_MANIFEST_SCHEMA_PATH is None:
raise Exception("Please provide DTS_MANIFEST_SCHEMA in the config file")

global _DTS_MANIFEST_VALIDATOR
# will raise an Exception if the schema is invalid
# TODO: write automated tests that exercise this code under different config
# conditions and error states.
_DTS_MANIFEST_VALIDATOR = load_and_validate_schema(DTS_MANIFEST_SCHEMA_PATH)

if FILE_EXTENSION_MAPPINGS is None:
raise Exception("Please provide FILE_EXTENSION_MAPPINGS in the config file ")
with open(FILE_EXTENSION_MAPPINGS, "r", encoding="utf-8") as file_extension_mappings_file:
Expand Down
14 changes: 10 additions & 4 deletions staging_service/import_specifications/individual_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pathlib import Path
from typing import Any, Optional, Tuple, Union

from jsonschema import Draft202012Validator
import magic
import pandas
from frozendict import frozendict
Expand Down Expand Up @@ -319,7 +320,7 @@ def parse_excel(path: Path) -> ParseResults:
return _error(Error(ErrorType.PARSE_FAIL, "No non-header data in file", spcsrc))


def parse_dts_manifest(path: Path) -> ParseResults:
def parse_dts_manifest(path: Path, validator: Draft202012Validator) -> ParseResults:
"""
Parse the provided DTS manifest file. Expected to be JSON, and will fail otherwise.
The manifest should have this format, with expected keys included:
Expand All @@ -342,7 +343,7 @@ def parse_dts_manifest(path: Path) -> ParseResults:
and its value will be a Tuple of frozendicts of the parameters. Also, in keeping
with the xsv parsers, each parameter value is expected to be a PRIMITIVE_TYPE.
TODO: include further details here, and in separate documentation - ADR?
TODO: include further details in separate documentation
"""
spcsrc = SpecificationSource(path)
errors = []
Expand All @@ -352,8 +353,13 @@ def parse_dts_manifest(path: Path) -> ParseResults:
with open(path, "r") as manifest:
manifest_json = json.load(manifest)
if not isinstance(manifest_json, dict):
errors.append(Error(ErrorType.PARSE_FAIL, "Manifest is not a dictionary", spcsrc))

return _error(Error(ErrorType.PARSE_FAIL, "Manifest is not a dictionary", spcsrc))
for err in validator.iter_errors(manifest_json):
err_str = err.message
err_path = list(err.absolute_path)
if err_path:
err_str += f" at {err_path}"
errors.append(Error(ErrorType.PARSE_FAIL, err_str, spcsrc))
except json.JSONDecodeError:
return _error(Error(ErrorType.PARSE_FAIL, "File must be in JSON format", spcsrc))
except FileNotFoundError:
Expand Down
1 change: 1 addition & 0 deletions staging_service/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class Path(object):
_DATA_DIR = None # expects to be set by config
_CONCIERGE_PATH = None # expects to be set by config
_FILE_EXTENSION_MAPPINGS = None # expects to be set by config
_DTS_MANIFEST_SCHEMA_PATH = None # expects to be set by config

__slots__ = ["full_path", "metadata_path", "user_path", "name", "jgi_metadata"]

Expand Down
Loading

0 comments on commit 5a7c064

Please sign in to comment.