From 1ad7b186e274b4480174072de512bf82c2eaa2bc Mon Sep 17 00:00:00 2001 From: Zach Sailer Date: Thu, 4 Jan 2024 11:47:28 -0800 Subject: [PATCH 1/2] Add first set of schemas a simple Python package to put schemas on disk --- .gitignore | 160 ++++++++++++++++++ README.md | 18 ++ jupyter_schemas.py | 61 +++++++ .../events/contents_service/v1.json | 34 ++++ jupyter_server/events/gateway_client/v1.json | 33 ++++ jupyter_server/events/kernel_actions/v1.json | 63 +++++++ pyproject.toml | 60 +++++++ 7 files changed, 429 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 jupyter_schemas.py create mode 100644 jupyter_server/events/contents_service/v1.json create mode 100644 jupyter_server/events/gateway_client/v1.json create mode 100644 jupyter_server/events/kernel_actions/v1.json create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..68bc17f --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..804842f --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# [Schema](https://schema.jupyter.org) + +JSON schemas for Jupyter. + +## Url format + +``` +https://schema.jupyter.org/[subproject]/[schema-path]/[version].json +``` + +* `[component]` can be, e.g. `jupyter-lab`, `jupyter-server`, `jupyter-hub`, `jupyter-kernel`, `jupyter-notebook`, etc. +* `[version]` defines the version of the library for which you get the schema. Underspecified versions match the latest version of the unspecified part. For example `v1` matches the latest major release, `v1.1` matches the latest minor release, and `v1.1.1` matches an exact version. + +## Install + +``` +pip install jupyter-schema +``` diff --git a/jupyter_schemas.py b/jupyter_schemas.py new file mode 100644 index 0000000..6e43ce0 --- /dev/null +++ b/jupyter_schemas.py @@ -0,0 +1,61 @@ +import logging +import typing +import json +import pathlib +import urllib.parse +from jupyter_core.paths import jupyter_path + +__version__ = "0.1" + + +class JupyterSchemaNotFound(Exception): + """A exception type for missing schemas""" + + +class JupyterSchemaSourceNotFound(Exception): + """""" + + +ROOT_SCHEMA_PATH: pathlib.Path = pathlib.Path(jupyter_path()[0]) / "schema" + + +def schema_path(schema_uri: str) -> pathlib.Path: + parts = urllib.parse.urlparse(schema_uri) + # Make sure this is a relative path + relpath = parts.path.lstrip("/") + return (ROOT_SCHEMA_PATH / relpath).with_suffix(".json") + + +def get_jupyter_schema(schema_uri: str) -> str: + """Returns the schema as a string.""" + path = schema_path(schema_uri) + if not path.exists(): + raise JupyterSchemaNotFound(f"Could not locate schema {schema_uri}") + return path.read_text() + + +def list_schema_paths(project: typing.Optional[str] = None) -> typing.List[str]: + """Return a list of all found Jupyter schema filepaths.""" + project_path = ROOT_SCHEMA_PATH + if project: + project_path /= project + + if not project_path.exists(): + raise JupyterSchemaSourceNotFound(f"Could not locate schemas for {project}.") + + return [str(f) for f in project_path.rglob("*") if f.is_file()] + + +def list_schemas(project: typing.Optional[str] = None) -> typing.List[str]: + """Return a list of all found Jupyter schema URIs.""" + schema_files = list_schema_paths() + schema_uris = [] + for fpath in schema_files: + try: + with open(fpath, "r") as f: + schema = json.load(f) + schema_uris.append(schema["$id"]) + except KeyError: + logging.warning(f"Could not find an ID/URI in {fpath}.") + pass + return schema_uris diff --git a/jupyter_server/events/contents_service/v1.json b/jupyter_server/events/contents_service/v1.json new file mode 100644 index 0000000..f9c9b53 --- /dev/null +++ b/jupyter_server/events/contents_service/v1.json @@ -0,0 +1,34 @@ +{ + "$id": "https://schema.jupyter.org/jupyter_server/events/contents_service/v1", + "version": 1, + "title": "Contents Manager activities", + "personal-data": true, + "description": "Record actions on files via the ContentsManager.\n\nThe notebook ContentsManager REST API is used by all frontends to retrieve,\nsave, list, delete and perform other actions on notebooks, directories,\nand other files through the UI. This is pluggable - the default acts on\nthe file system, but can be replaced with a different ContentsManager\nimplementation - to work on S3, Postgres, other object stores, etc.\nThe events get recorded regardless of the ContentsManager implementation\nbeing used.\n\nLimitations:\n\n1. This does not record all filesystem access, just the ones that happen\n explicitly via the notebook server's REST API. Users can (and often do)\n trivially access the filesystem in many other ways (such as `open()` calls\n in their code), so this is usually never a complete record.\n2. As with all events recorded by the notebook server, users most likely\n have the ability to modify the code of the notebook server. Unless other\n security measures are in place, these events should be treated as user\n controlled and not used in high security areas.\n3. Events are only recorded when an action succeeds.\n", + "type": "object", + "required": [ + "action", + "path" + ], + "properties": { + "action": { + "enum": [ + "get", + "create", + "save", + "upload", + "rename", + "copy", + "delete" + ], + "description": "Action performed by the ContentsManager API.\n\nThis is a required field.\n\nPossible values:\n\n1. get\n Get contents of a particular file, or list contents of a directory.\n\n2. save\n Save a file at path with contents from the client\n\n3. rename\n Rename a file or directory from value in source_path to\n value in path.\n\n4. copy\n Copy a file or directory from value in source_path to\n value in path.\n\n5. delete\n Delete a file or empty directory at given path\n" + }, + "path": { + "type": "string", + "description": "Logical path on which the operation was performed.\n\nThis is a required field.\n" + }, + "source_path": { + "type": "string", + "description": "Source path of an operation when action is 'copy' or 'rename'\n" + } + } +} diff --git a/jupyter_server/events/gateway_client/v1.json b/jupyter_server/events/gateway_client/v1.json new file mode 100644 index 0000000..4d141a6 --- /dev/null +++ b/jupyter_server/events/gateway_client/v1.json @@ -0,0 +1,33 @@ +{ + "$id": "https://schema.jupyter.org/jupyter_server/events/gateway_client/v1", + "version": 1, + "title": "Gateway Client activities.", + "personal-data": true, + "description": "Record events of a gateway client.\n", + "type": "object", + "required": [ + "status", + "msg" + ], + "properties": { + "status": { + "enum": [ + "error", + "success" + ], + "description": "Status received by Gateway client based on the rest api operation to gateway kernel.\n\nThis is a required field.\n\nPossible values:\n\n1. error\n Error response from a rest api operation to gateway kernel.\n\n2. success\n Success response from a rest api operation to gateway kernel.\n" + }, + "status_code": { + "type": "number", + "description": "Http response codes from a rest api operation to gateway kernel.\nExamples: 200, 400, 502, 503, 599 etc.\n" + }, + "msg": { + "type": "string", + "description": "Description of the event being emitted.\n" + }, + "gateway_url": { + "type": "string", + "description": "Gateway url where the remote server exist.\n" + } + } +} diff --git a/jupyter_server/events/kernel_actions/v1.json b/jupyter_server/events/kernel_actions/v1.json new file mode 100644 index 0000000..abee832 --- /dev/null +++ b/jupyter_server/events/kernel_actions/v1.json @@ -0,0 +1,63 @@ +{ + "$id": "https://schema.jupyter.org/jupyter_server/events/kernel_actions/v1", + "version": 1, + "title": "Kernel Manager activities", + "personal-data": true, + "description": "Record events of a kernel manager.\n", + "type": "object", + "required": [ + "action", + "msg" + ], + "properties": { + "action": { + "enum": [ + "start", + "interrupt", + "shutdown", + "restart" + ], + "description": "Action performed by the Kernel Manager.\n\nThis is a required field.\n\nPossible values:\n\n1. start\n A kernel has been started with the given kernel id.\n\n2. interrupt\n A kernel has been interrupted for the given kernel id.\n\n3. shutdown\n A kernel has been shut down for the given kernel id.\n\n4. restart\n A kernel has been restarted for the given kernel id.\n" + }, + "kernel_id": { + "type": "string", + "description": "Kernel id.\n\nThis is a required field for all actions and statuses except action start with status error.\n" + }, + "kernel_name": { + "type": "string", + "description": "Name of the kernel.\n" + }, + "status": { + "enum": [ + "error", + "success" + ], + "description": "Status received from a rest api operation to kernel server.\n\nThis is a required field.\n\nPossible values:\n\n1. error\n Error response from a rest api operation to kernel server.\n\n2. success\n Success response from a rest api operation to kernel server.\n" + }, + "status_code": { + "type": "number", + "description": "Http response codes from a rest api operation to kernel server.\nExamples: 200, 400, 502, 503, 599 etc\n" + }, + "msg": { + "type": "string", + "description": "Description of the event specified in action.\n" + } + }, + "if": { + "not": { + "properties": { + "status": { + "const": "error" + }, + "action": { + "const": "start" + } + } + } + }, + "then": { + "required": [ + "kernel_id" + ] + } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d2d5b6f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,60 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "jupyter-schemas" +dynamic = ["version"] +description = 'Jupyter schemas' +readme = "README.md" +requires-python = ">=3.8" +license = "MIT" +keywords = [] +authors = [{ name = "Zach Sailer", email = "zsailer@apple.com" }] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = ["jupyter_core"] + +[project.urls] +Documentation = "https://github.com/unknown/jupyter-schemas#readme" +Issues = "https://github.com/unknown/jupyter-schemas/issues" +Source = "https://github.com/unknown/jupyter-schemas" + +[tool.hatch.version] +path = "jupyter_schemas.py" + +[tool.hatch.build.targets.wheel.shared-data] +"jupyter_server" = "share/jupyter/schema/jupyter_server" + +[tool.hatch.envs.default] +dependencies = ["coverage[toml]>=6.5", "pytest"] +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +cov-report = ["- coverage combine", "coverage report"] +cov = ["test-cov", "cov-report"] + +[[tool.hatch.envs.all.matrix]] +python = ["3.8", "3.9", "3.10", "3.11", "3.12"] + +[tool.hatch.envs.types] +dependencies = ["mypy>=1.0.0"] + + +[tool.coverage.run] +source_pkgs = ["jupyter_schemas", "tests"] +branch = true +parallel = true + + +[tool.coverage.report] +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] From 0b48a3428bb707b951f3c857b6ea98776b75f974 Mon Sep 17 00:00:00 2001 From: Zach Sailer Date: Thu, 4 Jan 2024 12:07:54 -0800 Subject: [PATCH 2/2] Update Python example --- README.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 804842f..4403530 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,30 @@ https://schema.jupyter.org/[subproject]/[schema-path]/[version].json ## Install +The package included in this repo installs all of Jupyter's core schemas in +Jupyter's data directory, e.g. under `share/jupyter/schemas/`. + +Install this package using: +``` +pip install jupyter-schemas +``` + +This package also includes small Python package for fetching these schemas + +For example, to get a list of all installed schemas, try: +```python +import jupyter_schemas + +print(jupyter_schemas.list_schemas()) ``` -pip install jupyter-schema + +You can fetch the contents of a schemas from disk using: +```python +# Use the schema's URI to find it +uri = "https://schema.jupyter.org/jupyter_server/events/contents_service/v1" + +# Load the schema +print(jupyter_schemas.get_jupyter_schema(uri)) ``` + +