diff --git a/py/stencila/schema/__main__.py b/py/stencila/schema/__main__.py index b5d1f5bc..f30d5289 100644 --- a/py/stencila/schema/__main__.py +++ b/py/stencila/schema/__main__.py @@ -2,7 +2,7 @@ import logging from sys import argv, stderr, stdout -from executor import execute_document +from .interpreter import execute_document def cli_execute(): diff --git a/py/executor.py b/py/stencila/schema/interpreter.py similarity index 83% rename from py/executor.py rename to py/stencila/schema/interpreter.py index a388efd1..1d84e8c7 100644 --- a/py/executor.py +++ b/py/stencila/schema/interpreter.py @@ -11,7 +11,7 @@ import astor from stencila.schema.types import Parameter, CodeChunk, Article, Entity, CodeExpression, ConstantSchema, EnumSchema, \ BooleanSchema, NumberSchema, IntegerSchema, StringSchema, ArraySchema, TupleSchema, ImageObject, Datatable, \ - DatatableColumn, SchemaTypes, SoftwareSourceCode + DatatableColumn, SchemaTypes, SoftwareSourceCode, Function, Variable from stencila.schema.util import from_json, to_json try: @@ -58,31 +58,19 @@ def write(self, string: typing.Union[bytes, str]) -> int: return super(StdoutBuffer, self).buffer.write(string) -class Function: - name: str - parameters: typing.List[Parameter] - returns: SchemaTypes - - -class Variable: - name: str - schema: typing.Optional[SchemaTypes] - - def __init__(self, name: str, schema: typing.Optional[SchemaTypes] = None): - self.name = name - self.schema = schema - - class DocumentCompilationResult: parameters: typing.List[Parameter] = [] code: typing.List[ExecutableCode] = [] - declares: typing.List[typing.Union[Function, Variable]] = [] + assigns: typing.List[typing.Union[Function, Variable]] = [] imports: typing.List[str] = [] class CodeChunkParseResult(typing.NamedTuple): imports: typing.List[typing.Union[str, SoftwareSourceCode]] = [] + assigns: typing.List[typing.Union[Variable]] = [] declares: typing.List[typing.Union[Function, Variable]] = [] + uses: typing.List[str] = [] + reads: typing.List[str] = [] def annotation_name_to_schema(name: str) -> typing.Optional[SchemaTypes]: @@ -102,13 +90,59 @@ def annotation_name_to_schema(name: str) -> typing.Optional[SchemaTypes]: return None +def mode_is_read(mode: str) -> bool: + return 'r' in mode or '+' in mode + + +def parse_open_filename(open_call: ast.Call) -> typing.Optional[str]: + # if not hasattr(open_call, 'args') or len(open_call.args) == 0: + # return None + filename = None + + if hasattr(open_call, 'args'): + if len(open_call.args) >= 1: + if not isinstance(open_call.args[0], ast.Str): + return None + filename = open_call.args[0].s + + if len(open_call.args) >= 2: + if not isinstance(open_call.args[1], ast.Str): + return None + + if not mode_is_read(open_call.args[1].s): + return None + + if hasattr(open_call, 'keywords'): + for kw in open_call.keywords: + if not isinstance(kw.value, ast.Str): + continue + + if kw.arg == 'file': + filename = kw.value.s + + if kw.arg == 'mode': + if not mode_is_read(kw.value.s): + return None + + return filename + + def parse_code_chunk(chunk: CodeChunk) -> CodeChunkParseResult: imports: typing.List[str] = [] + assigns: typing.List[Variable] = [] declares: typing.List[typing.Union[Function, Variable]] = [] - + uses: typing.Set[str] = set() + reads: typing.Set[str] = set() seen_vars: typing.Set[str] = set() - for statement in ast.parse(chunk.text).body: + # If this is True, then there should be a call to 'open' somewhere in the code, which means the parser should + # try to find it. This is a basic check so there might not be one (like if the code did , but if 'open(' is NOT in + # the string then there definitely ISN'T one + search_for_open = 'open(' in chunk.text + + chunk_ast = ast.parse(chunk.text) + + for statement in chunk_ast.body: if isinstance(statement, ast.ImportFrom): if statement.module not in imports: imports.append(statement.module) @@ -117,7 +151,7 @@ def parse_code_chunk(chunk: CodeChunk) -> CodeChunkParseResult: if module_name.name not in imports: imports.append(module_name.name) elif isinstance(statement, ast.FunctionDef): - f = Function() + f = Function(statement.name) f.parameters = [] for i, arg in enumerate(statement.args.args): @@ -155,10 +189,25 @@ def parse_code_chunk(chunk: CodeChunk) -> CodeChunkParseResult: if hasattr(statement, 'annotation'): # assignment with Type Annotation v.schema = annotation_name_to_schema(statement.annotation.id) - - declares.append(v) + declares.append(v) + else: + assigns.append(v) seen_vars.add(target_name) - return CodeChunkParseResult(imports, declares) + elif isinstance(statement, ast.Expr) and isinstance(statement.value, ast.Call): + if hasattr(statement.value, 'args'): + for arg in statement.value.args: + if isinstance(arg, ast.Name): + uses.add(arg.id) + + if search_for_open: + for node in ast.walk(chunk_ast): + if isinstance(node, ast.Call) and hasattr(node, 'func') and node.func.id == 'open': + filename = parse_open_filename(node) + + if filename: + reads.add(filename) + + return CodeChunkParseResult(imports, assigns, declares, list(uses), list(reads)) class DocumentCompiler: @@ -189,7 +238,10 @@ def handle_item(self, item: typing.Any, compilation_result: DocumentCompilationR if item.language == self.TARGET_LANGUAGE: # Only add Python code if isinstance(item, CodeChunk): - parse_code_chunk(item) + cc_result = parse_code_chunk(item) + item.assigns = cc_result.assigns + item.uses = cc_result.uses + item.reads = cc_result.reads compilation_result.code.append(item) logger.debug('Adding {}'.format(type(item))) @@ -215,7 +267,7 @@ def traverse_list(self, l: typing.List, compilation_result: DocumentCompilationR self.handle_item(child, compilation_result) -class Executor: +class Interpreter: """Execute a list of code blocks, maintaining its own `globals` scope for this execution run.""" globals: typing.Optional[typing.Dict[str, typing.Any]] @@ -421,7 +473,7 @@ def execute_document(cli_args: typing.List[str]): doc_parser = DocumentCompiler() doc_parser.compile(article) - e = Executor() + e = Interpreter() pp = ParameterParser(doc_parser.parameters) pp.parse_cli_args(cli_args) diff --git a/py/stencila/schema/types.py b/py/stencila/schema/types.py index 4c76d75e..233f86be 100644 --- a/py/stencila/schema/types.py +++ b/py/stencila/schema/types.py @@ -191,15 +191,31 @@ def __init__( class CodeChunk(CodeBlock): """A executable chunk of code.""" + alters: Optional[Array[str]] = None + assigns: Optional[Array[Union[str, "Variable"]]] = None + declares: Optional[Array[Union[str, "Variable", "Function"]]] = None + duration: Optional[float] = None + errors: Optional[Array["CodeError"]] = None + imports: Optional[Array[Union[str, "SoftwareSourceCode", "SoftwareApplication"]]] = None outputs: Optional[Array["Node"]] = None + reads: Optional[Array[str]] = None + uses: Optional[Array[Union[str, "Variable"]]] = None def __init__( self, text: str, + alters: Optional[Array[str]] = None, + assigns: Optional[Array[Union[str, "Variable"]]] = None, + declares: Optional[Array[Union[str, "Variable", "Function"]]] = None, + duration: Optional[float] = None, + errors: Optional[Array["CodeError"]] = None, id: Optional[str] = None, + imports: Optional[Array[Union[str, "SoftwareSourceCode", "SoftwareApplication"]]] = None, language: Optional[str] = None, meta: Optional[Dict[str, Any]] = None, - outputs: Optional[Array["Node"]] = None + outputs: Optional[Array["Node"]] = None, + reads: Optional[Array[str]] = None, + uses: Optional[Array[Union[str, "Variable"]]] = None ) -> None: super().__init__( text=text, @@ -207,8 +223,24 @@ def __init__( language=language, meta=meta ) + if alters is not None: + self.alters = alters + if assigns is not None: + self.assigns = assigns + if declares is not None: + self.declares = declares + if duration is not None: + self.duration = duration + if errors is not None: + self.errors = errors + if imports is not None: + self.imports = imports if outputs is not None: self.outputs = outputs + if reads is not None: + self.reads = reads + if uses is not None: + self.uses = uses class CodeFragment(Code): @@ -233,11 +265,13 @@ def __init__( class CodeExpression(CodeFragment): """An expression defined in programming language source code.""" + errors: Optional[Array["CodeError"]] = None output: Optional["Node"] = None def __init__( self, text: str, + errors: Optional[Array["CodeError"]] = None, id: Optional[str] = None, language: Optional[str] = None, meta: Optional[Dict[str, Any]] = None, @@ -249,10 +283,31 @@ def __init__( language=language, meta=meta ) + if errors is not None: + self.errors = errors if output is not None: self.output = output +class CodeError(Entity): + """An error that occured when parsing, compiling or executing some Code.""" + + trace: Optional[str] = None + + def __init__( + self, + id: Optional[str] = None, + meta: Optional[Dict[str, Any]] = None, + trace: Optional[str] = None + ) -> None: + super().__init__( + id=id, + meta=meta + ) + if trace is not None: + self.trace = trace + + class ConstantSchema(Entity): """A schema specifying a constant value that a node must have.""" @@ -999,6 +1054,36 @@ def __init__( self.label = label +class Function(Entity): + """ + A function with a name, which might take Parameters and return a value of a + certain type. + """ + + name: str + parameters: Optional[Array["Parameter"]] = None + returns: Optional["SchemaTypes"] = None + + def __init__( + self, + name: str, + id: Optional[str] = None, + meta: Optional[Dict[str, Any]] = None, + parameters: Optional[Array["Parameter"]] = None, + returns: Optional["SchemaTypes"] = None + ) -> None: + super().__init__( + id=id, + meta=meta + ) + if name is not None: + self.name = name + if parameters is not None: + self.parameters = parameters + if returns is not None: + self.returns = returns + + class Heading(Entity): """Heading""" @@ -1376,11 +1461,12 @@ def __init__( self.content = content -class Parameter(Entity): - """A parameter that can be set and used in evaluated code.""" +class Variable(Entity): + """A variable that can be set and used in code.""" name: str default: Optional["Node"] = None + required: Optional[bool] = None schema: Optional["SchemaTypes"] = None def __init__( @@ -1389,6 +1475,7 @@ def __init__( default: Optional["Node"] = None, id: Optional[str] = None, meta: Optional[Dict[str, Any]] = None, + required: Optional[bool] = None, schema: Optional["SchemaTypes"] = None ) -> None: super().__init__( @@ -1399,10 +1486,39 @@ def __init__( self.name = name if default is not None: self.default = default + if required is not None: + self.required = required if schema is not None: self.schema = schema +class Parameter(Variable): + """A parameter that can be set and used in evaluated code.""" + + default: Optional["Node"] = None + required: Optional[bool] = None + + def __init__( + self, + name: str, + default: Optional["Node"] = None, + id: Optional[str] = None, + meta: Optional[Dict[str, Any]] = None, + required: Optional[bool] = None, + schema: Optional["SchemaTypes"] = None + ) -> None: + super().__init__( + name=name, + id=id, + meta=meta, + schema=schema + ) + if default is not None: + self.default = default + if required is not None: + self.required = required + + class Periodical(CreativeWork): """A periodical publication.""" diff --git a/py/tests/code_parsing_test.py b/py/tests/code_parsing_test.py new file mode 100644 index 00000000..0d8ad7f0 --- /dev/null +++ b/py/tests/code_parsing_test.py @@ -0,0 +1,35 @@ +from stencila.schema.interpreter import parse_code_chunk +from stencila.schema.types import Variable, IntegerSchema, CodeChunk + + +def test_variable_parsing(): + """Test that variables without annotations are extracted into `assigns` and variables with are to `declares.`""" + c = CodeChunk("no_ann = 5\nwith_ann: int = 10") + + parse_result = parse_code_chunk(c) + + assert len(parse_result.declares) == 1 + assert type(parse_result.declares[0]) == Variable + assert parse_result.declares[0].name == 'with_ann' + assert type(parse_result.declares[0].schema) == IntegerSchema + + assert len(parse_result.assigns) == 1 + assert type(parse_result.assigns[0]) == Variable + assert parse_result.assigns[0].name == 'no_ann' + assert parse_result.assigns[0].schema is None + + +def test_variable_reassignment(): + """ + If a variable is declared and set and then set to another value later, it should only be in the `declares` array. + """ + c = CodeChunk("with_ann: int = 10\nwith_ann = 5") + + parse_result = parse_code_chunk(c) + + assert len(parse_result.declares) == 1 + assert type(parse_result.declares[0]) == Variable + assert parse_result.declares[0].name == 'with_ann' + assert type(parse_result.declares[0].schema) == IntegerSchema + + assert len(parse_result.assigns) == 0 diff --git a/py/tox.ini b/py/tox.ini index 46eb8a2b..93a33f35 100644 --- a/py/tox.ini +++ b/py/tox.ini @@ -6,6 +6,7 @@ deps = pytest pytest-asyncio pytest-cov coverage + astor commands = pytest --cov {envsitepackagesdir}/stencila --cov-report term --cov-report xml tests [pytest] diff --git a/ts/docs.ts b/ts/docs.ts index 5d0434e1..9f866739 100644 --- a/ts/docs.ts +++ b/ts/docs.ts @@ -12,7 +12,15 @@ import { flatten } from 'lodash' import path from 'path' // The main reason this is imported is to configure the log handling import log from './log' -import { Article, CodeFragment, codeFragment, Link, link, Node, Strong } from './types' +import { + Article, + CodeFragment, + codeFragment, + Link, + link, + Node, + Strong +} from './types' import { isArticle } from './util/guards' // eslint-disable-next-line @typescript-eslint/no-floating-promises