From 8266cf7fc707fa6fd6f1093a35549a8880ae438f Mon Sep 17 00:00:00 2001 From: Nokome Bentley Date: Mon, 22 Jul 2019 22:48:01 +1200 Subject: [PATCH] feat(Python and R bindings): Initial versions of bindings for Python and R --- .gitignore | 4 +- .prettierignore | 4 +- R/util.R | 59 ++++++++++++++ package-lock.json | 53 ++++++++++--- package.json | 9 ++- python.test.sh | 9 +++ python/__init__.py | 1 + r.test.sh | 5 ++ src/bindings.ts | 118 ++++++++++++++++++++++++++++ src/python.ts | 192 +++++++++++++++++++++++++++++++++++++++++++++ src/r.ts | 153 ++++++++++++++++++++++++++++++++++++ tests/article.R | 16 ++++ tests/article.py | 14 ++++ 13 files changed, 618 insertions(+), 19 deletions(-) create mode 100644 R/util.R create mode 100755 python.test.sh create mode 100644 python/__init__.py create mode 100755 r.test.sh create mode 100644 src/bindings.ts create mode 100644 src/python.ts create mode 100644 src/r.ts create mode 100644 tests/article.R create mode 100644 tests/article.py diff --git a/.gitignore b/.gitignore index ceea24a9..8aec24ba 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,5 @@ /dist /node_modules .DS_Store - -### Generated files ### -types.ts +/.mypy_cache *.out.* diff --git a/.prettierignore b/.prettierignore index 101ddb43..782aa044 100644 --- a/.prettierignore +++ b/.prettierignore @@ -2,6 +2,4 @@ built dist node_modules public - -### Generated files ### -types.ts +.mypy_cache diff --git a/R/util.R b/R/util.R new file mode 100644 index 00000000..ae77efd8 --- /dev/null +++ b/R/util.R @@ -0,0 +1,59 @@ +Any <- function () { + self <- list() + class(self) <- "Any" + self +} + +format.Any <- function (type) { + "Any()" +} + +Array <- function (items) { + self <- list(items=items) + class(self) <- "Array" + self +} + +format.Array <- function (type) { + paste0("Array(", paste(sapply(type$items, format), collapse=", "), ")") +} + +Union <- function (...) { + self <- list(types=as.character(c(...))) + class(self) <- "Union" + self +} + +format.Union <- function (type) { + paste0("Union(", paste(sapply(type$types, format), collapse=", "), ")") +} + +isType <- function (value, type) { + if(class(type) == "Any") { + TRUE + } else if (class(type) == "character") { + type_obj <- get(type) + if (class(type_obj) %in% c('Any', 'Array', 'Union')) isType(value, type_obj) + else inherits(value, type) + } else if (class(type) == "Array") { + if(class(value) != "list") return(FALSE) + for(item in value) { + if(!isType(item, type$items)) return(FALSE) + } + TRUE + } else if(class(type) == "Union") { + inherits(value, type$types) + } else { + FALSE + } +} + +assertType <- function (value, type) { + if(!isType(value, type)) stop(paste("value is type", class(value), "not expected type", format(type)), call. = FALSE) + value +} + +setProp <- function (node, name, type, value) { + if(!isType(value, type)) stop(paste("value for", name, "is type", class(value), "not expected type", format(type)), call. = FALSE) + node[[name]] <- value +} diff --git a/package-lock.json b/package-lock.json index 5d0011bf..df905349 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1318,6 +1318,12 @@ "integrity": "sha512-ffCdcrEE5h8DqVxinQjo+2d1q+FV5z7iNtPofw3JsrltSoSVlOGaW0rY8XxtO9XukdTn8TaCGWmk2VFGhI70mg==", "dev": true }, + "@types/toposort": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@types/toposort/-/toposort-2.0.1.tgz", + "integrity": "sha512-u9mzB6WehAYUw6RJLGO2IbxNpg1ec/ooBfLxjYCAsanETh5M3GVG8dijvwyGzXEdgdWxcIBKnOkeaLnZg+VO8g==", + "dev": true + }, "@types/unist": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.3.tgz", @@ -5502,7 +5508,8 @@ "ansi-regex": { "version": "2.1.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "aproba": { "version": "1.2.0", @@ -5523,12 +5530,14 @@ "balanced-match": { "version": "1.0.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "brace-expansion": { "version": "1.1.11", "bundled": true, "dev": true, + "optional": true, "requires": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" @@ -5543,17 +5552,20 @@ "code-point-at": { "version": "1.1.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "concat-map": { "version": "0.0.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "console-control-strings": { "version": "1.1.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "core-util-is": { "version": "1.0.2", @@ -5670,7 +5682,8 @@ "inherits": { "version": "2.0.3", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "ini": { "version": "1.3.5", @@ -5682,6 +5695,7 @@ "version": "1.0.0", "bundled": true, "dev": true, + "optional": true, "requires": { "number-is-nan": "^1.0.0" } @@ -5696,6 +5710,7 @@ "version": "3.0.4", "bundled": true, "dev": true, + "optional": true, "requires": { "brace-expansion": "^1.1.7" } @@ -5703,12 +5718,14 @@ "minimist": { "version": "0.0.8", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "minipass": { "version": "2.3.5", "bundled": true, "dev": true, + "optional": true, "requires": { "safe-buffer": "^5.1.2", "yallist": "^3.0.0" @@ -5727,6 +5744,7 @@ "version": "0.5.1", "bundled": true, "dev": true, + "optional": true, "requires": { "minimist": "0.0.8" } @@ -5807,7 +5825,8 @@ "number-is-nan": { "version": "1.0.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "object-assign": { "version": "4.1.1", @@ -5819,6 +5838,7 @@ "version": "1.4.0", "bundled": true, "dev": true, + "optional": true, "requires": { "wrappy": "1" } @@ -5904,7 +5924,8 @@ "safe-buffer": { "version": "5.1.2", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "safer-buffer": { "version": "2.1.2", @@ -5940,6 +5961,7 @@ "version": "1.0.2", "bundled": true, "dev": true, + "optional": true, "requires": { "code-point-at": "^1.0.0", "is-fullwidth-code-point": "^1.0.0", @@ -5959,6 +5981,7 @@ "version": "3.0.1", "bundled": true, "dev": true, + "optional": true, "requires": { "ansi-regex": "^2.0.0" } @@ -6002,12 +6025,14 @@ "wrappy": { "version": "1.0.2", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "yallist": { "version": "3.0.3", "bundled": true, - "dev": true + "dev": true, + "optional": true } } }, @@ -15751,6 +15776,12 @@ } } }, + "toposort": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/toposort/-/toposort-2.0.2.tgz", + "integrity": "sha1-riF2gXXRVZ1IvvNUILL0li8JwzA=", + "dev": true + }, "tough-cookie": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-3.0.1.tgz", diff --git a/package.json b/package.json index d76e22a8..e979f28e 100644 --- a/package.json +++ b/package.json @@ -10,10 +10,12 @@ "scripts": { "lint": "prettier --write './**/*.{js,json,md,ts,yaml}' && eslint './**/*.{js,ts}' --fix", "test": "gulp test", - "build": "npm run build:ts", + "build": "npm run build:jsonschema && npm run build:ts && npm run build:py", "build:jsonschema": "ts-node src/schema.ts", "build:jsonld": "gulp jsonld", "build:ts": "ts-node src/typescript.ts", + "build:py": "ts-node src/python.ts", + "build:r": "ts-node src/r.ts", "docs": "npm run docs:readme && npm run docs:build", "docs:readme": "markdown-toc -i --maxdepth=4 README.md", "docs:build": "ts-node scripts/docs && scripts/docs.sh", @@ -35,6 +37,7 @@ "@stencila/encoda": "^0.59.1", "@types/fs-extra": "^8.0.0", "@types/js-yaml": "^3.12.1", + "@types/toposort": "^2.0.1", "ajv": "^6.10.2", "better-ajv-errors": "^0.6.4", "fs-extra": "^8.1.0", @@ -48,6 +51,7 @@ "object.fromentries": "^2.0.0", "tempy": "^0.3.0", "through2": "^3.0.1", + "toposort": "^2.0.2", "ts-node": "^8.3.0", "typescript": "^3.5.3", "vscode-json-languageservice": "^3.2.0" @@ -85,5 +89,6 @@ "prettier": "@stencila/dev-config/prettier-config.json", "release": { "extends": "@stencila/semantic-release-config" - } + }, + "dependencies": {} } diff --git a/python.test.sh b/python.test.sh new file mode 100755 index 00000000..ce9bc0cd --- /dev/null +++ b/python.test.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e + +export PYTHONPATH=${PYTHONPATH}:${PWD} + +python3 tests/article.py + +mypy tests/article.py diff --git a/python/__init__.py b/python/__init__.py new file mode 100644 index 00000000..0df0a3c0 --- /dev/null +++ b/python/__init__.py @@ -0,0 +1 @@ +import types diff --git a/r.test.sh b/r.test.sh new file mode 100755 index 00000000..5fe10807 --- /dev/null +++ b/r.test.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -e + +Rscript tests/article.R diff --git a/src/bindings.ts b/src/bindings.ts new file mode 100644 index 00000000..ea05e73c --- /dev/null +++ b/src/bindings.ts @@ -0,0 +1,118 @@ +/** + * A module providing functions to be used in languages bindings. + */ + +import fs from 'fs-extra'; +import globby from 'globby'; +import path from 'path'; +import toposort from 'toposort'; +import * as schema from './schema'; +import Schema from './schema.d'; + +export type Schema = Schema + +/** + * Read the schemas from `built/*.schema.json`. + */ +export async function read( + glob: string = path.join(__dirname, '..', 'built', '*.schema.json') +): Promise { + // Ensure `*.schema.json` files are up to date + await schema.build() + + // Read in the schemas + const files = await globby(glob) + return Promise.all( + files.map( + async (file: string): Promise => fs.readJSON(file) + ) + ) +} + +/** + * Generate code for 'normal' types (i.e. not union types) which are + * usually translated into classes or similar for the language. + * + * Types are sorted topologically so that schemas come before + * any of their descendants. + */ +export function types(schemas: Schema[]): Schema[] { + const types = schemas.filter(schema => schema.anyOf === undefined) + const map = new Map(schemas.map(schema => [schema.title, schema])) + + const edges = types.map( + (schema): [string, string] => [ + schema.extends !== undefined ? schema.extends : '', + schema.title !== undefined ? schema.title : '' + ] + ) + const ordered = toposort(edges).filter(title => title !== '') + + return ordered + .map(title => { + const schema = map.get(title) + if (schema === undefined) throw new Error(`Holy smokes, "${title}" aint in da map @#!&??!`) + return schema + }) +} + +/** + * Interface for properties giving a little + * more information on each property to be used in code generation + */ +interface Property { + name: string + schema: Schema + inherited: boolean + optional: boolean +} + +/** + * Get properties for a schema. + * + * Properties are arranged in groups according to required (or not) + * and inherited (or not). + */ +export function props( + schema: Schema +): { + all: Property[] + inherited: Property[] + own: Property[] + required: Property[] + optional: Property[] +} { + const { title, properties = {}, required = [] } = schema + + const props = Object.entries(properties) + .filter(([name, h]) => name !== 'type') + .map( + ([name, schema]): Property => { + const { from } = schema + const inherited = from !== title + const optional = required === undefined || !required.includes(name) + return { name, schema, inherited, optional } + } + ) + .sort((a, b) => { + if (a.optional === b.optional) { + if (a.name === b.name) return 0 + if (a.name < b.name) return -1 + return 1 + } + if (a.optional) return 1 + return -1 + }) + + return { + all: props, + inherited: props.filter(prop => prop.inherited), + own: props.filter(prop => !prop.inherited), + required: props.filter(prop => !prop.optional), + optional: props.filter(prop => prop.optional) + } +} + +export function unions(schemas: Schema[]): Schema[] { + return schemas.filter(schema => schema.anyOf !== undefined) +} diff --git a/src/python.ts b/src/python.ts new file mode 100644 index 00000000..1b7d25cd --- /dev/null +++ b/src/python.ts @@ -0,0 +1,192 @@ +/** + * Generate Python language bindings. + */ + +import crypto from 'crypto' +import path from 'path' +import fs from 'fs-extra' +import { + read, + types, + props, + Schema, + unions +} from './bindings' + +/** + * Run `build()` when this file is run as a Node script + */ +// eslint-disable-next-line @typescript-eslint/no-floating-promises +if (module.parent === null) build() + +/** + * A list of global definitions required for enums + */ +let globals: string[] = [] + +/** + * Generate `python/types.py` from schemas. + */ +async function build(): Promise { + const schemas = await read() + + globals = [] + const classesCode = types(schemas).map(classGenerator).join('') + const unionsCode = unions(schemas).map(unionGenerator).join('') + const globalsCode = globals.join('\n') + + const code = ` +from typing import Any, Dict, List as Array, Optional, Union +from enum import Enum + +${globalsCode} + +${classesCode} + +${unionsCode} +` + + await fs.writeFile(path.join(__dirname, '..', 'python', 'types.py'), code) +} + +/** + * Generate a `class`. + */ +function classGenerator (schema: Schema): string { + const { title, extends: parent, description } = schema + const { inherited, own, required, optional } = props(schema) + + const base = parent !== undefined ? '(' + parent + ')' : '' + const clas = `class ${title}${base}:\n """\n ${description}\n """\n\n` + + const attrs = own + .map(({ name, schema, optional }) => { + const type = schemaToType(schema) + const attrType = optional ? `Optional[${type}]` : type + return ` ${name}: ${attrType}` + }) + .join('\n') + + const initPars = + '\n' + + [ + ' self', + ...required.map( + ({ name, schema }) => ` ${name}: ${schemaToType(schema)}` + ), + ...optional.map( + ({ name, schema }) => + ` ${name}: Optional[${schemaToType(schema)}] = None` + ) + ].join(',\n') + + '\n ' + + const superArgs = + '\n' + + inherited.map(({ name }) => ` ${name}=${name}`).join(',\n') + + '\n ' + const superCall = ` super().__init__(${superArgs})` + + const initSetters = own + .map(({ name }) => ` if ${name} != None: self.${name} = ${name}`) + .join('\n') + + const init = ` def __init__(${initPars}) -> None:\n${superCall}\n${initSetters}\n\n` + + return clas + (attrs.length > 0 ? attrs + '\n\n' : '') + init + '\n' +} + +/** + * Generate a `Union` type. + */ +function unionGenerator (schema: Schema): string { + const {title, description} = schema + let code = `"""\n${description}\n"""\n` + code += `${title} = ${schemaToType(schema)}\n\n` + return code +} + +/** + * Convert a schema definition to a Python type + */ +function schemaToType(schema: Schema): string { + const { type, anyOf, allOf, $ref } = schema + + if ($ref !== undefined) return `"${$ref.replace('.schema.json', '')}"` + if (anyOf !== undefined) return anyOfToType(anyOf) + if (allOf !== undefined) return allOfToType(allOf) + if (schema.enum !== undefined) return enumToType(schema.enum) + + if (type === 'null') return 'None' + if (type === 'boolean') return 'bool' + if (type === 'number') return 'float' + if (type === 'integer') return 'int' + if (type === 'string') return 'str' + if (type === 'array') return arrayToType(schema) + if (type === 'object') return 'Dict[str, Any]' + + throw new Error(`Unhandled schema: ${JSON.stringify(schema)}`) +} + +/** + * Convert a schema with the `anyOf` property to a Python `Union` type. + */ +function anyOfToType(anyOf: Schema[]): string { + const types = anyOf + .map(schema => schemaToType(schema)) + .reduce( + (prev: string[], curr) => (prev.includes(curr) ? prev : [...prev, curr]), + [] + ) + if (types.length === 0) return '' + if (types.length === 1) return types[0] + return `Union[${types.join(', ')}]` +} + +/** + * Convert a schema with the `allOf` property to a Python type. + * + * If the `allOf` is singular then just use that (this usually arises + * because the `allOf` is used for a property with a `$ref`). Otherwise, + * use the last schema (this is usually because one or more codecs can be + * used on a property and the last schema is the final, expected, type of + * the property). + */ +function allOfToType(allOf: Schema[]): string { + if (allOf.length === 1) return schemaToType(allOf[0]) + else return schemaToType(allOf[allOf.length - 1]) +} + +/** + * Convert a schema with the `array` property to a Python `Array` type. + */ +function arrayToType(schema: Schema): string { + const items = Array.isArray(schema.items) + ? anyOfToType(schema.items) + : schema.items !== undefined + ? schemaToType(schema.items) + : 'Any' + return `Array[${items}]` +} + +/** + * Convert a schema with the `enum` property to a Python `Enum`. + */ +function enumToType(enu: (string | number)[]): string { + const values = enu + .map(schema => { + return JSON.stringify(schema) + }) + .join(', ') + const signature = crypto + .createHash('md5') + .update(values) + .digest('hex') + + const name = `Enum${signature}` + const defn = `${name} = Enum("${signature}", [${values}])` + + if (!globals.includes(defn)) globals.push(defn) + + return `"${name}"` +} diff --git a/src/r.ts b/src/r.ts new file mode 100644 index 00000000..fcc5a037 --- /dev/null +++ b/src/r.ts @@ -0,0 +1,153 @@ +/** ` + * Module for generating R language bindings. + */ + +import fs from 'fs-extra' +import path from 'path' +import { + read, + types, + props, + Schema, + unions +} from './bindings' + +/** + * Run `build()` when this file is run as a Node script + */ +// eslint-disable-next-line @typescript-eslint/no-floating-promises +if (module.parent === null) build() + +/** + * Generate `src/types.R` from schemas. + */ +async function build(): Promise { + const schemas = await read() + + const classesCode = types(schemas).map(classGenerator).join('\n') + const unionsCode = unions(schemas).map(unionGenerator).join('\n') + + const code = ` +${classesCode} + +${unionsCode} +` + + await fs.writeFile(path.join(__dirname, '..', 'R', 'types.R'), code) +} + +/** + * Generate a function for a normal type. + */ +function classGenerator (schema: Schema): string { + const { title, extends: parent, description, properties } = schema + const { inherited, own, required, optional } = props(schema) + + let code = `${title} <- function (\n` + code += [ + ...required.map(({ name }) => ` ${name}`), + ...optional.map(({ name }) => ` ${name}`) + ].join(',\n') + code += `\n){\n` + + if (parent === undefined) { + code += ` self <- list()\n` + } else { + code += ` self <- ${parent}(\n` + code += inherited.map(({ name }) => ` ${name}=${name}`).join(',\n') + code += '\n )\n' + } + + code += own + .map(({ name, schema }) => { + const type = schemaToType(schema) + return ` if(!missing(${name})) setProp(self, "${name}", ${type}, ${name})` + }) + .join('\n') + + code += `\n class(self) <- c(class(self), "${title}")` + code += `\n self` + + code += `\n}\n\n` + + return code +} + +/** + * Generate a `Union` type. + */ +function unionGenerator (schema: Schema): string { + const {title = '', description = title} = schema + let code = `#\` ${description.replace('\n', '\n#` ')}\n` + code += `${title} = ${schemaToType(schema)}\n\n` + return code +} + +/** + * Convert a schema definition to a R class + */ +function schemaToType(schema: Schema): string { + const { type, anyOf, allOf, $ref } = schema + + if ($ref !== undefined) return `"${$ref.replace('.schema.json', '')}"` + if (anyOf !== undefined) return anyOfToType(anyOf) + if (allOf !== undefined) return allOfToType(allOf) + if (schema.enum !== undefined) return enumToType(schema.enum) + + if (type === 'null') return '"NULL"' + if (type === 'boolean') return '"logical"' + if (type === 'number') return '"numeric"' + if (type === 'integer') return '"numeric"' + if (type === 'string') return '"character"' + if (type === 'array') return arrayToType(schema) + if (type === 'object') return '"list"' + + throw new Error(`Unhandled schema: ${JSON.stringify(schema)}`) +} + +/** + * Convert a schema with the `anyOf` property to a `Union` type checker. + */ +function anyOfToType(anyOf: Schema[]): string { + const types = anyOf + .map(schema => schemaToType(schema)) + .reduce( + (prev: string[], curr) => (prev.includes(curr) ? prev : [...prev, curr]), + [] + ) + if (types.length === 0) return '' + if (types.length === 1) return types[0] + return `Union(${types.join(', ')})` +} + +/** + * Convert a schema with the `allOf` property to a Python type. + */ +function allOfToType(allOf: Schema[]): string { + if (allOf.length === 1) return schemaToType(allOf[0]) + else return schemaToType(allOf[allOf.length - 1]) +} + +/** + * Convert a schema with the `array` property to an `Array` type checker. + */ +function arrayToType(schema: Schema): string { + const items = Array.isArray(schema.items) + ? anyOfToType(schema.items) + : schema.items !== undefined + ? schemaToType(schema.items) + : 'Any()' + return `Array(${items})` +} + +/** + * Convert a schema with the `enum` property to an `Enum` type checker. + */ +function enumToType(enu: (string | number)[]): string { + const values = enu + .map(schema => { + return JSON.stringify(schema) + }) + .join(', ') + return `"Enum"` +} diff --git a/tests/article.R b/tests/article.R new file mode 100644 index 00000000..05f0b3ab --- /dev/null +++ b/tests/article.R @@ -0,0 +1,16 @@ +source("R/util.R") +source("R/types.R") + +article = Article( + title='', + authors=list(Person( + givenNames=list('Jane') + )), + content=list( + Paragraph(content=list('Hello')) + ) +) + +aarticle = Article( + title = 0 +) diff --git a/tests/article.py b/tests/article.py new file mode 100644 index 00000000..1e71c535 --- /dev/null +++ b/tests/article.py @@ -0,0 +1,14 @@ +from python.types import Article, Person, Paragraph + +article = Article( + title='', + authors=[ + Person( + givenNames=['Jane'] + ), + ], + content=[ + Paragraph(['Hello']) + ] +) +