diff --git a/traits/observation/_generated_parser.py b/traits/observation/_generated_parser.py index d15eafc81..73a16d162 100644 --- a/traits/observation/_generated_parser.py +++ b/traits/observation/_generated_parser.py @@ -1,9 +1,11 @@ -# The file was automatically generated by Lark v0.8.5 +# The file was automatically generated by Lark v1.1.5 +__version__ = "1.1.5" + # # # Lark Stand-alone Generator Tool # ---------------------------------- -# Generates a stand-alone LALR(1) parser with a standard lexer +# Generates a stand-alone LALR(1) parser # # Git: https://github.com/erezsh/lark # Author: Erez Shinan (erezshin@gmail.com) @@ -24,120 +26,268 @@ # # -import os -from io import open +from abc import ABC, abstractmethod +from collections.abc import Sequence +from types import ModuleType +from typing import ( + TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, + Union, Iterable, IO, TYPE_CHECKING, overload, + Pattern as REPattern, ClassVar, Set, Mapping +) + class LarkError(Exception): pass + +class ConfigurationError(LarkError, ValueError): + pass + + +def assert_config(value, options: Collection, msg='Got %r, expected one of %s'): + if value not in options: + raise ConfigurationError(msg % (value, options)) + + class GrammarError(LarkError): pass + class ParseError(LarkError): pass + class LexError(LarkError): pass -class UnexpectedEOF(ParseError): - def __init__(self, expected): - self.expected = expected - - message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) - super(UnexpectedEOF, self).__init__(message) - +T = TypeVar('T') class UnexpectedInput(LarkError): + #-- + line: int + column: int pos_in_stream = None + state: Any + _terminals_by_name = None - def get_context(self, text, span=40): + def get_context(self, text: str, span: int=40) -> str: + #-- + assert self.pos_in_stream is not None, self pos = self.pos_in_stream start = max(pos - span, 0) end = pos + span - before = text[start:pos].rsplit('\n', 1)[-1] - after = text[pos:end].split('\n', 1)[0] - return before + after + '\n' + ' ' * len(before) + '^\n' - - def match_examples(self, parse_fn, examples): - """ Given a parser instance and a dictionary mapping some label with - some malformed syntax examples, it'll return the label for the - example that bests matches the current error. - """ + if not isinstance(text, bytes): + before = text[start:pos].rsplit('\n', 1)[-1] + after = text[pos:end].split('\n', 1)[0] + return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n' + else: + before = text[start:pos].rsplit(b'\n', 1)[-1] + after = text[pos:end].split(b'\n', 1)[0] + return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") + + def match_examples(self, parse_fn: 'Callable[[str], Tree]', + examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], + token_type_match_fallback: bool=False, + use_accepts: bool=True + ) -> Optional[T]: + #-- assert self.state is not None, "Not supported for this exception" - candidate = None - for label, example in examples.items(): - assert not isinstance(example, STRING_TYPE) + if isinstance(examples, Mapping): + examples = examples.items() + + candidate = (None, False) + for i, (label, example) in enumerate(examples): + assert not isinstance(example, str), "Expecting a list" - for malformed in example: + for j, malformed in enumerate(example): try: parse_fn(malformed) except UnexpectedInput as ut: if ut.state == self.state: - try: - if ut.token == self.token: # Try exact match first + if ( + use_accepts + and isinstance(self, UnexpectedToken) + and isinstance(ut, UnexpectedToken) + and ut.accepts != self.accepts + ): + logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % + (self.state, self.accepts, ut.accepts, i, j)) + continue + if ( + isinstance(self, (UnexpectedToken, UnexpectedEOF)) + and isinstance(ut, (UnexpectedToken, UnexpectedEOF)) + ): + if ut.token == self.token: ## + + logger.debug("Exact Match at example [%s][%s]" % (i, j)) return label - except AttributeError: - pass - if not candidate: - candidate = label - return candidate + if token_type_match_fallback: + ## + + if (ut.token.type == self.token.type) and not candidate[-1]: + logger.debug("Token Type Fallback at example [%s][%s]" % (i, j)) + candidate = label, True + + if candidate[0] is None: + logger.debug("Same State match at example [%s][%s]" % (i, j)) + candidate = label, False + + return candidate[0] + + def _format_expected(self, expected): + if self._terminals_by_name: + d = self._terminals_by_name + expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected] + return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected) + + +class UnexpectedEOF(ParseError, UnexpectedInput): + #-- + expected: 'List[Token]' + + def __init__(self, expected, state=None, terminals_by_name=None): + super(UnexpectedEOF, self).__init__() + + self.expected = expected + self.state = state + from .lexer import Token + self.token = Token("", "") ## + + self.pos_in_stream = -1 + self.line = -1 + self.column = -1 + self._terminals_by_name = terminals_by_name + + + def __str__(self): + message = "Unexpected end-of-input. " + message += self._format_expected(self.expected) + return message class UnexpectedCharacters(LexError, UnexpectedInput): - def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): - message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column) + #-- + + allowed: Set[str] + considered_tokens: Set[Any] + + def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, + terminals_by_name=None, considered_rules=None): + super(UnexpectedCharacters, self).__init__() + + ## self.line = line self.column = column - self.allowed = allowed - self.considered_tokens = considered_tokens self.pos_in_stream = lex_pos self.state = state + self._terminals_by_name = terminals_by_name + + self.allowed = allowed + self.considered_tokens = considered_tokens + self.considered_rules = considered_rules + self.token_history = token_history - message += '\n\n' + self.get_context(seq) - if allowed: - message += '\nExpecting: %s\n' % allowed - if token_history: - message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history) + if isinstance(seq, bytes): + self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace") + else: + self.char = seq[lex_pos] + self._context = self.get_context(seq) - super(UnexpectedCharacters, self).__init__(message) + def __str__(self): + message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column) + message += '\n\n' + self._context + if self.allowed: + message += self._format_expected(self.allowed) + if self.token_history: + message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history) + return message class UnexpectedToken(ParseError, UnexpectedInput): - def __init__(self, token, expected, considered_rules=None, state=None): - self.token = token - self.expected = expected # XXX str shouldn't necessary + #-- + + expected: Set[str] + considered_rules: Set[str] + interactive_parser: 'InteractiveParser' + + def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): + super(UnexpectedToken, self).__init__() + + ## + self.line = getattr(token, 'line', '?') self.column = getattr(token, 'column', '?') - self.considered_rules = considered_rules + self.pos_in_stream = getattr(token, 'start_pos', None) self.state = state - self.pos_in_stream = getattr(token, 'pos_in_stream', None) - message = ("Unexpected token %r at line %s, column %s.\n" - "Expected one of: \n\t* %s\n" - % (token, self.line, self.column, '\n\t* '.join(self.expected))) + self.token = token + self.expected = expected ## + + self._accepts = NO_VALUE + self.considered_rules = considered_rules + self.interactive_parser = interactive_parser + self._terminals_by_name = terminals_by_name + self.token_history = token_history + + + @property + def accepts(self) -> Set[str]: + if self._accepts is NO_VALUE: + self._accepts = self.interactive_parser and self.interactive_parser.accepts() + return self._accepts + + def __str__(self): + message = ("Unexpected token %r at line %s, column %s.\n%s" + % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected))) + if self.token_history: + message += "Previous tokens: %r\n" % self.token_history + + return message + - super(UnexpectedToken, self).__init__(message) class VisitError(LarkError): - """VisitError is raised when visitors are interrupted by an exception + #-- + + obj: 'Union[Tree, Token]' + orig_exc: Exception - It provides the following attributes for inspection: - - obj: the tree node or token it was processing when the exception was raised - - orig_exc: the exception that cause it to fail - """ def __init__(self, rule, obj, orig_exc): + message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) + super(VisitError, self).__init__(message) + + self.rule = rule self.obj = obj self.orig_exc = orig_exc - message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) - super(VisitError, self).__init__(message) -def classify(seq, key=None, value=None): - d = {} +class MissingVariableError(LarkError): + pass + + +import sys, re +import logging + +logger: logging.Logger = logging.getLogger("lark") +logger.addHandler(logging.StreamHandler()) +## + +## + +logger.setLevel(logging.CRITICAL) + + +NO_VALUE = object() + +T = TypeVar("T") + + +def classify(seq: Sequence, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict: + d: Dict[Any, Any] = {} for item in seq: k = key(item) if (key is not None) else item v = value(item) if (value is not None) else item @@ -148,9 +298,10 @@ def classify(seq, key=None, value=None): return d -def _deserialize(data, namespace, memo): +def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any: if isinstance(data, dict): - if '__type__' in data: # Object + if '__type__' in data: ## + class_ = namespace[data['__type__']] return class_.deserialize(data, memo) elif '@' in data: @@ -161,26 +312,30 @@ def _deserialize(data, namespace, memo): return data -class Serialize(object): - def memo_serialize(self, types_to_memoize): +_T = TypeVar("_T", bound="Serialize") + +class Serialize: + #-- + + def memo_serialize(self, types_to_memoize: List) -> Any: memo = SerializeMemoizer(types_to_memoize) return self.serialize(memo), memo.serialize() - def serialize(self, memo=None): + def serialize(self, memo = None) -> Dict[str, Any]: if memo and memo.in_types(self): return {'@': memo.memoized.get(self)} fields = getattr(self, '__serialize_fields__') res = {f: _serialize(getattr(self, f), memo) for f in fields} res['__type__'] = type(self).__name__ - postprocess = getattr(self, '_serialize', None) - if postprocess: - postprocess(res, memo) + if hasattr(self, '_serialize'): + self._serialize(res, memo) ## + return res @classmethod - def deserialize(cls, data, memo): - namespace = getattr(cls, '__serialize_namespace__', {}) + def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T: + namespace = getattr(cls, '__serialize_namespace__', []) namespace = {c.__name__:c for c in namespace} fields = getattr(cls, '__serialize_fields__') @@ -194,113 +349,165 @@ def deserialize(cls, data, memo): setattr(inst, f, _deserialize(data[f], namespace, memo)) except KeyError as e: raise KeyError("Cannot find key for class", cls, e) - postprocess = getattr(inst, '_deserialize', None) - if postprocess: - postprocess() + + if hasattr(inst, '_deserialize'): + inst._deserialize() ## + + return inst class SerializeMemoizer(Serialize): + #-- + __serialize_fields__ = 'memoized', - def __init__(self, types_to_memoize): + def __init__(self, types_to_memoize: List) -> None: self.types_to_memoize = tuple(types_to_memoize) self.memoized = Enumerator() - def in_types(self, value): + def in_types(self, value: Serialize) -> bool: return isinstance(value, self.types_to_memoize) - def serialize(self): + def serialize(self) -> Dict[int, Any]: ## + return _serialize(self.memoized.reversed(), None) @classmethod - def deserialize(cls, data, namespace, memo): - return _deserialize(data, namespace, memo) + def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]: ## + return _deserialize(data, namespace, memo) try: - STRING_TYPE = basestring -except NameError: # Python 3 - STRING_TYPE = str + import regex + _has_regex = True +except ImportError: + _has_regex = False +if sys.version_info >= (3, 11): + import re._parser as sre_parse + import re._constants as sre_constants +else: + import sre_parse + import sre_constants -import types -from functools import wraps, partial -from contextlib import contextmanager +categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') -Str = type(u'') -try: - classtype = types.ClassType # Python2 -except AttributeError: - classtype = type # Python3 - -def smart_decorator(f, create_decorator): - if isinstance(f, types.FunctionType): - return wraps(f)(create_decorator(f, True)) +def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]: + if _has_regex: + ## - elif isinstance(f, (classtype, type, types.BuiltinFunctionType)): - return wraps(f)(create_decorator(f, False)) + ## - elif isinstance(f, types.MethodType): - return wraps(f)(create_decorator(f.__func__, True)) - - elif isinstance(f, partial): - # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445 - return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True)) + ## + regexp_final = re.sub(categ_pattern, 'A', expr) else: - return create_decorator(f.__func__.__call__, True) + if re.search(categ_pattern, expr): + raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr) + regexp_final = expr + try: + ## -import sys, re -Py36 = (sys.version_info[:2] >= (3, 6)) + return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] ## -import sre_parse -import sre_constants -def get_regexp_width(regexp): - try: - return [int(x) for x in sre_parse.parse(regexp).getwidth()] except sre_constants.error: - raise ValueError(regexp) + if not _has_regex: + raise ValueError(expr) + else: + ## + + ## + c = regex.compile(regexp_final) + if c.match('') is None: + ## + + return 1, int(sre_constants.MAXREPEAT) + else: + return 0, int(sre_constants.MAXREPEAT) + + +from collections import OrderedDict class Meta: + + empty: bool + line: int + column: int + start_pos: int + end_line: int + end_column: int + end_pos: int + orig_expansion: 'List[TerminalDef]' + match_tree: bool + def __init__(self): self.empty = True -class Tree(object): - def __init__(self, data, children, meta=None): + +_Leaf_T = TypeVar("_Leaf_T") +Branch = Union[_Leaf_T, 'Tree[_Leaf_T]'] + + +class Tree(Generic[_Leaf_T]): + #-- + + data: str + children: 'List[Branch[_Leaf_T]]' + + def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None: self.data = data self.children = children self._meta = meta @property - def meta(self): + def meta(self) -> Meta: if self._meta is None: self._meta = Meta() return self._meta def __repr__(self): - return 'Tree(%s, %s)' % (self.data, self.children) + return 'Tree(%r, %r)' % (self.data, self.children) def _pretty_label(self): return self.data def _pretty(self, level, indent_str): + yield f'{indent_str*level}{self._pretty_label()}' if len(self.children) == 1 and not isinstance(self.children[0], Tree): - return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] + yield f'\t{self.children[0]}\n' + else: + yield '\n' + for n in self.children: + if isinstance(n, Tree): + yield from n._pretty(level+1, indent_str) + else: + yield f'{indent_str*(level+1)}{n}\n' - l = [ indent_str*level, self._pretty_label(), '\n' ] - for n in self.children: - if isinstance(n, Tree): - l += n._pretty(level+1, indent_str) - else: - l += [ indent_str*(level+1), '%s' % (n,), '\n' ] + def pretty(self, indent_str: str=' ') -> str: + #-- + return ''.join(self._pretty(0, indent_str)) - return l + def __rich__(self, parent:'rich.tree.Tree'=None) -> 'rich.tree.Tree': + #-- + return self._rich(parent) - def pretty(self, indent_str=' '): - return ''.join(self._pretty(0, indent_str)) + def _rich(self, parent): + if parent: + tree = parent.add(f'[bold]{self.data}[/bold]') + else: + import rich.tree + tree = rich.tree.Tree(self.data) + + for c in self.children: + if isinstance(c, Tree): + c._rich(tree) + else: + tree.add(f'[green]{c}[/green]') + + return tree def __eq__(self, other): try: @@ -311,87 +518,108 @@ def __eq__(self, other): def __ne__(self, other): return not (self == other) - def __hash__(self): + def __hash__(self) -> int: return hash((self.data, tuple(self.children))) - def iter_subtrees(self): - # TODO: Re-write as a more efficient version - - visited = set() - q = [self] + def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]': + #-- + queue = [self] + subtrees = OrderedDict() + for subtree in queue: + subtrees[id(subtree)] = subtree + ## + + queue += [c for c in reversed(subtree.children) ## + + if isinstance(c, Tree) and id(c) not in subtrees] + + del queue + return reversed(list(subtrees.values())) + + def iter_subtrees_topdown(self): + #-- + stack = [self] + stack_append = stack.append + stack_pop = stack.pop + while stack: + node = stack_pop() + if not isinstance(node, Tree): + continue + yield node + for child in reversed(node.children): + stack_append(child) - l = [] - while q: - subtree = q.pop() - l.append( subtree ) - if id(subtree) in visited: - continue # already been here from another branch - visited.add(id(subtree)) - q += [c for c in subtree.children if isinstance(c, Tree)] - - seen = set() - for x in reversed(l): - if id(x) not in seen: - yield x - seen.add(id(x)) - - def find_pred(self, pred): - "Find all nodes where pred(tree) == True" + def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]': + #-- return filter(pred, self.iter_subtrees()) - def find_data(self, data): - "Find all nodes where tree.data == data" + def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]': + #-- return self.find_pred(lambda t: t.data == data) +from functools import wraps, update_wrapper from inspect import getmembers, getmro -class Discard(Exception): - pass +_Return_T = TypeVar('_Return_T') +_Return_V = TypeVar('_Return_V') +_Leaf_T = TypeVar('_Leaf_T') +_Leaf_U = TypeVar('_Leaf_U') +_R = TypeVar('_R') +_FUNC = Callable[..., _Return_T] +_DECORATED = Union[_FUNC, type] + +class _DiscardType: + #-- + + def __repr__(self): + return "lark.visitors.Discard" + +Discard = _DiscardType() + +## -# Transformers class _Decoratable: + #-- + @classmethod - def _apply_decorator(cls, decorator, **kwargs): + def _apply_v_args(cls, visit_wrapper): mro = getmro(cls) assert mro[0] is cls libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)} for name, value in getmembers(cls): - # Make sure the function isn't inherited (unless it's overwritten) + ## + if name.startswith('_') or (name in libmembers and name not in cls.__dict__): continue - if not callable(cls.__dict__[name]): + if not callable(value): continue - # Skip if v_args already applied (at the function level) - if hasattr(cls.__dict__[name], 'vargs_applied'): + ## + + if isinstance(cls.__dict__[name], _VArgsWrapper): continue - static = isinstance(cls.__dict__[name], (staticmethod, classmethod)) - setattr(cls, name, decorator(value, static=static, **kwargs)) + setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper)) return cls def __class_getitem__(cls, _): return cls -class Transformer(_Decoratable): - """Visits the tree recursively, starting with the leaves and finally the root (bottom-up) +class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]): + #-- + __visit_tokens__ = True ## - Calls its methods (provided by user via inheritance) according to tree.data - The returned value replaces the old one in the structure. - - Can be used to implement map or reduce. - """ - __visit_tokens__ = True # For backwards compatibility - def __init__(self, visit_tokens=True): + def __init__(self, visit_tokens: bool=True) -> None: self.__visit_tokens__ = visit_tokens def _call_userfunc(self, tree, new_children=None): - # Assumes tree is already transformed + ## + children = new_children if new_children is not None else tree.children try: f = getattr(self, tree.data) @@ -404,7 +632,7 @@ def _call_userfunc(self, tree, new_children=None): return f.visit_wrapper(f, tree.data, children, tree.meta) else: return f(children) - except (GrammarError, Discard): + except GrammarError: raise except Exception as e: raise VisitError(tree.data, tree, e) @@ -417,47 +645,72 @@ def _call_userfunc_token(self, token): else: try: return f(token) - except (GrammarError, Discard): + except GrammarError: raise except Exception as e: raise VisitError(token.type, token, e) - def _transform_children(self, children): for c in children: - try: - if isinstance(c, Tree): - yield self._transform_tree(c) - elif self.__visit_tokens__ and isinstance(c, Token): - yield self._call_userfunc_token(c) - else: - yield c - except Discard: - pass + if isinstance(c, Tree): + res = self._transform_tree(c) + elif self.__visit_tokens__ and isinstance(c, Token): + res = self._call_userfunc_token(c) + else: + res = c + + if res is not Discard: + yield res def _transform_tree(self, tree): children = list(self._transform_children(tree.children)) return self._call_userfunc(tree, children) - def transform(self, tree): + def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: + #-- return self._transform_tree(tree) - def __mul__(self, other): + def __mul__( + self: 'Transformer[_Leaf_T, Tree[_Leaf_U]]', + other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]' + ) -> 'TransformerChain[_Leaf_T, _Return_V]': + #-- return TransformerChain(self, other) def __default__(self, data, children, meta): - "Default operation on tree (for override)" + #-- return Tree(data, children, meta) def __default_token__(self, token): - "Default operation on token (for override)" + #-- return token +def merge_transformers(base_transformer=None, **transformers_to_merge): + #-- + if base_transformer is None: + base_transformer = Transformer() + for prefix, transformer in transformers_to_merge.items(): + for method_name in dir(transformer): + method = getattr(transformer, method_name) + if not callable(method): + continue + if method_name.startswith("_") or method_name == "transform": + continue + prefixed_method = prefix + "__" + method_name + if hasattr(base_transformer, prefixed_method): + raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) + + setattr(base_transformer, prefixed_method, method) + + return base_transformer + + +class InlineTransformer(Transformer): ## -class InlineTransformer(Transformer): # XXX Deprecated def _call_userfunc(self, tree, new_children=None): - # Assumes tree is already transformed + ## + children = new_children if new_children is not None else tree.children try: f = getattr(self, tree.data) @@ -467,78 +720,129 @@ def _call_userfunc(self, tree, new_children=None): return f(*children) -class TransformerChain(object): - def __init__(self, *transformers): +class TransformerChain(Generic[_Leaf_T, _Return_T]): + + transformers: 'Tuple[Union[Transformer, TransformerChain], ...]' + + def __init__(self, *transformers: 'Union[Transformer, TransformerChain]') -> None: self.transformers = transformers - def transform(self, tree): + def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: for t in self.transformers: tree = t.transform(tree) - return tree + return cast(_Return_T, tree) - def __mul__(self, other): + def __mul__( + self: 'TransformerChain[_Leaf_T, Tree[_Leaf_U]]', + other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]' + ) -> 'TransformerChain[_Leaf_T, _Return_V]': return TransformerChain(*self.transformers + (other,)) class Transformer_InPlace(Transformer): - "Non-recursive. Changes the tree in-place instead of returning new instances" - def _transform_tree(self, tree): # Cancel recursion + #-- + def _transform_tree(self, tree): ## + return self._call_userfunc(tree) - def transform(self, tree): + def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: for subtree in tree.iter_subtrees(): subtree.children = list(self._transform_children(subtree.children)) return self._transform_tree(tree) +class Transformer_NonRecursive(Transformer): + #-- + + def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: + ## + + rev_postfix = [] + q: List[Branch[_Leaf_T]] = [tree] + while q: + t = q.pop() + rev_postfix.append(t) + if isinstance(t, Tree): + q += t.children + + ## + + stack: List = [] + for x in reversed(rev_postfix): + if isinstance(x, Tree): + size = len(x.children) + if size: + args = stack[-size:] + del stack[-size:] + else: + args = [] + + res = self._call_userfunc(x, args) + if res is not Discard: + stack.append(res) + + elif self.__visit_tokens__ and isinstance(x, Token): + res = self._call_userfunc_token(x) + if res is not Discard: + stack.append(res) + else: + stack.append(x) + + result, = stack ## + + ## + + ## + + ## + + return cast(_Return_T, result) + + class Transformer_InPlaceRecursive(Transformer): - "Recursive. Changes the tree in-place instead of returning new instances" + #-- def _transform_tree(self, tree): tree.children = list(self._transform_children(tree.children)) return self._call_userfunc(tree) +## -# Visitors class VisitorBase: def _call_userfunc(self, tree): return getattr(self, tree.data, self.__default__)(tree) def __default__(self, tree): - "Default operation on tree (for override)" + #-- return tree def __class_getitem__(cls, _): return cls -class Visitor(VisitorBase): - """Bottom-up visitor, non-recursive +class Visitor(VisitorBase, ABC, Generic[_Leaf_T]): + #-- - Visits the tree, starting with the leaves and finally the root (bottom-up) - Calls its methods (provided by user via inheritance) according to tree.data - """ - - def visit(self, tree): + def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: + #-- for subtree in tree.iter_subtrees(): self._call_userfunc(subtree) return tree - def visit_topdown(self,tree): + def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: + #-- for subtree in tree.iter_subtrees_topdown(): self._call_userfunc(subtree) return tree -class Visitor_Recursive(VisitorBase): - """Bottom-up visitor, recursive - Visits the tree, starting with the leaves and finally the root (bottom-up) - Calls its methods (provided by user via inheritance) according to tree.data - """ +class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]): + #-- - def visit(self, tree): + def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: + #-- for child in tree.children: if isinstance(child, Tree): self.visit(child) @@ -546,7 +850,8 @@ def visit(self, tree): self._call_userfunc(tree) return tree - def visit_topdown(self,tree): + def visit_topdown(self,tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: + #-- self._call_userfunc(tree) for child in tree.children: @@ -556,27 +861,19 @@ def visit_topdown(self,tree): return tree +class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]): + #-- -def visit_children_decor(func): - "See Interpreter" - @wraps(func) - def inner(cls, tree): - values = cls.visit_children(tree) - return func(cls, values) - return inner - + def visit(self, tree: Tree[_Leaf_T]) -> _Return_T: + ## -class Interpreter(_Decoratable): - """Top-down visitor, recursive + ## - Visits the tree, starting with the root and finally the leaves (top-down) - Calls its methods (provided by user via inheritance) according to tree.data + ## - Unlike Transformer and Visitor, the Interpreter doesn't automatically visit its sub-branches. - The user has to explicitly call visit_children, or use the @visit_children_decor - """ + return self._visit_tree(tree) - def visit(self, tree): + def _visit_tree(self, tree: Tree[_Leaf_T]): f = getattr(self, tree.data) wrapper = getattr(f, 'visit_wrapper', None) if wrapper is not None: @@ -584,8 +881,8 @@ def visit(self, tree): else: return f(tree) - def visit_children(self, tree): - return [self.visit(child) if isinstance(child, Tree) else child + def visit_children(self, tree: Tree[_Leaf_T]) -> List: + return [self._visit_tree(child) if isinstance(child, Tree) else child for child in tree.children] def __getattr__(self, name): @@ -595,69 +892,78 @@ def __default__(self, tree): return self.visit_children(tree) +_InterMethod = Callable[[Type[Interpreter], _Return_T], _R] + +def visit_children_decor(func: _InterMethod) -> _InterMethod: + #-- + @wraps(func) + def inner(cls, tree): + values = cls.visit_children(tree) + return func(cls, values) + return inner +## -# Decorators -def _apply_decorator(obj, decorator, **kwargs): +def _apply_v_args(obj, visit_wrapper): try: - _apply = obj._apply_decorator + _apply = obj._apply_v_args except AttributeError: - return decorator(obj, **kwargs) + return _VArgsWrapper(obj, visit_wrapper) else: - return _apply(decorator, **kwargs) + return _apply(visit_wrapper) +class _VArgsWrapper: + #-- + base_func: Callable -def _inline_args__func(func): - @wraps(func) - def create_decorator(_f, with_self): - if with_self: - def f(self, children): - return _f(self, *children) - else: - def f(self, children): - return _f(*children) - return f + def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]): + if isinstance(func, _VArgsWrapper): + func = func.base_func + ## - return smart_decorator(func, create_decorator) + self.base_func = func ## + self.visit_wrapper = visit_wrapper + update_wrapper(self, func) -def inline_args(obj): # XXX Deprecated - return _apply_decorator(obj, _inline_args__func) + def __call__(self, *args, **kwargs): + return self.base_func(*args, **kwargs) + def __get__(self, instance, owner=None): + try: + ## + ## -def _visitor_args_func_dec(func, visit_wrapper=None, static=False): - def create_decorator(_f, with_self): - if with_self: - def f(self, *args, **kwargs): - return _f(self, *args, **kwargs) + g = type(self.base_func).__get__ + except AttributeError: + return self else: - def f(self, *args, **kwargs): - return _f(*args, **kwargs) - return f + return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper) - if static: - f = wraps(func)(create_decorator(func, False)) - else: - f = smart_decorator(func, create_decorator) - f.vargs_applied = True - f.visit_wrapper = visit_wrapper - return f + def __set_name__(self, owner, name): + try: + f = type(self.base_func).__set_name__ + except AttributeError: + return + else: + f(self.base_func, owner, name) -def _vargs_inline(f, data, children, meta): +def _vargs_inline(f, _data, children, _meta): return f(*children) -def _vargs_meta_inline(f, data, children, meta): +def _vargs_meta_inline(f, _data, children, meta): return f(meta, *children) -def _vargs_meta(f, data, children, meta): - return f(children, meta) # TODO swap these for consistency? Backwards incompatible! +def _vargs_meta(f, _data, children, meta): + return f(meta, children) def _vargs_tree(f, data, children, meta): return f(Tree(data, children, meta)) -def v_args(inline=False, meta=False, tree=False, wrapper=None): - "A convenience decorator factory, for modifying the behavior of user-supplied visitor methods" + +def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper: Optional[Callable] = None) -> Callable[[_DECORATED], _DECORATED]: + #-- if tree and (meta or inline): raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.") @@ -678,74 +984,21 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): func = wrapper def _visitor_args_dec(obj): - return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func) + return _apply_v_args(obj, func) return _visitor_args_dec -class Indenter: - def __init__(self): - self.paren_level = None - self.indent_level = None - assert self.tab_len > 0 - - def handle_NL(self, token): - if self.paren_level > 0: - return - - yield token - - indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces - indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len - - if indent > self.indent_level[-1]: - self.indent_level.append(indent) - yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) - else: - while indent < self.indent_level[-1]: - self.indent_level.pop() - yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) - - assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1]) - - def _process(self, stream): - for token in stream: - if token.type == self.NL_type: - for t in self.handle_NL(token): - yield t - else: - yield token - - if token.type in self.OPEN_PAREN_types: - self.paren_level += 1 - elif token.type in self.CLOSE_PAREN_types: - self.paren_level -= 1 - assert self.paren_level >= 0 - - while len(self.indent_level) > 1: - self.indent_level.pop() - yield Token(self.DEDENT_type, '') - - assert self.indent_level == [0], self.indent_level - - def process(self, stream): - self.paren_level = 0 - self.indent_level = [0] - return self._process(stream) - - # XXX Hack for ContextualLexer. Maybe there's a more elegant solution? - @property - def always_accept(self): - return (self.NL_type,) - +TOKEN_DEFAULT_PRIORITY = 0 class Symbol(Serialize): __slots__ = ('name',) - is_term = NotImplemented + name: str + is_term: ClassVar[bool] = NotImplemented - def __init__(self, name): + def __init__(self, name: str) -> None: self.name = name def __eq__(self, other): @@ -763,11 +1016,14 @@ def __repr__(self): fullrepr = property(__repr__) + def renamed(self, f): + return type(self)(f(self.name)) + class Terminal(Symbol): __serialize_fields__ = 'name', 'filter_out' - is_term = True + is_term: ClassVar[bool] = True def __init__(self, name, filter_out=False): self.name = name @@ -777,38 +1033,43 @@ def __init__(self, name, filter_out=False): def fullrepr(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) + def renamed(self, f): + return type(self)(f(self.name), self.filter_out) class NonTerminal(Symbol): __serialize_fields__ = 'name', - is_term = False - + is_term: ClassVar[bool] = False class RuleOptions(Serialize): - __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices' + __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' - def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()): + keep_all_tokens: bool + expand1: bool + priority: Optional[int] + template_source: Optional[str] + empty_indices: Tuple[bool, ...] + + def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None: self.keep_all_tokens = keep_all_tokens self.expand1 = expand1 self.priority = priority + self.template_source = template_source self.empty_indices = empty_indices def __repr__(self): - return 'RuleOptions(%r, %r, %r)' % ( + return 'RuleOptions(%r, %r, %r, %r)' % ( self.keep_all_tokens, self.expand1, self.priority, + self.template_source ) class Rule(Serialize): - """ - origin : a symbol - expansion : a list of symbols - order : index of this expansion amongst all rules of the same name - """ + #-- __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' @@ -841,59 +1102,75 @@ def __eq__(self, other): +from copy import copy + +class Pattern(Serialize, ABC): -class Pattern(Serialize): + value: str + flags: Collection[str] + raw: Optional[str] + type: ClassVar[str] - def __init__(self, value, flags=()): + def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: self.value = value self.flags = frozenset(flags) + self.raw = raw def __repr__(self): return repr(self.to_regexp()) - # Pattern Hashing assumes all subclasses have a different priority! + ## + def __hash__(self): return hash((type(self), self.value, self.flags)) + def __eq__(self, other): return type(self) == type(other) and self.value == other.value and self.flags == other.flags - def to_regexp(self): + @abstractmethod + def to_regexp(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def min_width(self) -> int: raise NotImplementedError() - if Py36: - # Python 3.6 changed syntax for flags in regular expression - def _get_flags(self, value): - for f in self.flags: - value = ('(?%s:%s)' % (f, value)) - return value + @property + @abstractmethod + def max_width(self) -> int: + raise NotImplementedError() - else: - def _get_flags(self, value): - for f in self.flags: - value = ('(?%s)' % f) + value - return value + def _get_flags(self, value): + for f in self.flags: + value = ('(?%s:%s)' % (f, value)) + return value class PatternStr(Pattern): __serialize_fields__ = 'value', 'flags' - type = "str" + type: ClassVar[str] = "str" - def to_regexp(self): + def to_regexp(self) -> str: return self._get_flags(re.escape(self.value)) @property - def min_width(self): + def min_width(self) -> int: return len(self.value) - max_width = min_width + + @property + def max_width(self) -> int: + return len(self.value) + class PatternRE(Pattern): __serialize_fields__ = 'value', 'flags', '_width' - type = "re" + type: ClassVar[str] = "re" - def to_regexp(self): + def to_regexp(self) -> str: return self._get_flags(self.value) _width = None @@ -903,10 +1180,11 @@ def _get_width(self): return self._width @property - def min_width(self): + def min_width(self) -> int: return self._get_width()[0] + @property - def max_width(self): + def max_width(self) -> int: return self._get_width()[1] @@ -914,7 +1192,11 @@ class TerminalDef(Serialize): __serialize_fields__ = 'name', 'pattern', 'priority' __serialize_namespace__ = PatternStr, PatternRE - def __init__(self, name, pattern, priority=1): + name: str + pattern: Pattern + priority: int + + def __init__(self, name: str, pattern: Pattern, priority: int=TOKEN_DEFAULT_PRIORITY) -> None: assert isinstance(pattern, Pattern), pattern self.name = name self.pattern = pattern @@ -923,70 +1205,148 @@ def __init__(self, name, pattern, priority=1): def __repr__(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) + def user_repr(self) -> str: + if self.name.startswith('__'): ## + return self.pattern.raw or self.name + else: + return self.name + +_T = TypeVar('_T', bound="Token") + +class Token(str): + #-- + __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') + + __match_args__ = ('type', 'value') + + type: str + start_pos: Optional[int] + value: Any + line: Optional[int] + column: Optional[int] + end_line: Optional[int] + end_column: Optional[int] + end_pos: Optional[int] + + + @overload + def __new__( + cls, + type: str, + value: Any, + start_pos: Optional[int]=None, + line: Optional[int]=None, + column: Optional[int]=None, + end_line: Optional[int]=None, + end_column: Optional[int]=None, + end_pos: Optional[int]=None + ) -> 'Token': + ... + + @overload + def __new__( + cls, + type_: str, + value: Any, + start_pos: Optional[int]=None, + line: Optional[int]=None, + column: Optional[int]=None, + end_line: Optional[int]=None, + end_column: Optional[int]=None, + end_pos: Optional[int]=None + ) -> 'Token': ... + + def __new__(cls, *args, **kwargs): + if "type_" in kwargs: + warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning) + + if "type" in kwargs: + raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.") + kwargs["type"] = kwargs.pop("type_") + + return cls._future_new(*args, **kwargs) -class Token(Str): - __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') - def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): - try: - self = super(Token, cls).__new__(cls, value) - except UnicodeDecodeError: - value = value.decode('latin1') - self = super(Token, cls).__new__(cls, value) + @classmethod + def _future_new(cls, type, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): + inst = super(Token, cls).__new__(cls, value) + + inst.type = type + inst.start_pos = start_pos + inst.value = value + inst.line = line + inst.column = column + inst.end_line = end_line + inst.end_column = end_column + inst.end_pos = end_pos + return inst - self.type = type_ - self.pos_in_stream = pos_in_stream - self.value = value - self.line = line - self.column = column - self.end_line = end_line - self.end_column = end_column - self.end_pos = end_pos - return self + @overload + def update(self, type: Optional[str]=None, value: Optional[Any]=None) -> 'Token': + ... + + @overload + def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token': + ... + + def update(self, *args, **kwargs): + if "type_" in kwargs: + warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning) + + if "type" in kwargs: + raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.") + kwargs["type"] = kwargs.pop("type_") - def update(self, type_=None, value=None): + return self._future_update(*args, **kwargs) + + def _future_update(self, type: Optional[str]=None, value: Optional[Any]=None) -> 'Token': return Token.new_borrow_pos( - type_ if type_ is not None else self.type, + type if type is not None else self.type, value if value is not None else self.value, self ) @classmethod - def new_borrow_pos(cls, type_, value, borrow_t): - return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) + def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T: + return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) def __reduce__(self): - return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) + return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column)) def __repr__(self): - return 'Token(%s, %r)' % (self.type, self.value) + return 'Token(%r, %r)' % (self.type, self.value) def __deepcopy__(self, memo): - return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) + return Token(self.type, self.value, self.start_pos, self.line, self.column) def __eq__(self, other): if isinstance(other, Token) and self.type != other.type: return False - return Str.__eq__(self, other) + return str.__eq__(self, other) - __hash__ = Str.__hash__ + __hash__ = str.__hash__ class LineCounter: - def __init__(self): - self.newline_char = '\n' + __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char' + + def __init__(self, newline_char): + self.newline_char = newline_char self.char_pos = 0 self.line = 1 self.column = 1 self.line_start_pos = 0 - def feed(self, token, test_newline=True): - """Consume a token and calculate the new line & column. + def __eq__(self, other): + if not isinstance(other, LineCounter): + return NotImplemented - As an optional optimization, set test_newline=False is token doesn't contain a newline. - """ + return self.char_pos == other.char_pos and self.newline_char == other.newline_char + + def feed(self, token: Token, test_newline=True): + #-- if test_newline: newlines = token.count(self.newline_char) if newlines: @@ -996,62 +1356,18 @@ def feed(self, token, test_newline=True): self.char_pos += len(token) self.column = self.char_pos - self.line_start_pos + 1 -class _Lex: - "Built to serve both Lexer and ContextualLexer" - def __init__(self, lexer, state=None): - self.lexer = lexer - self.state = state - - def lex(self, stream, newline_types, ignore_types): - newline_types = frozenset(newline_types) - ignore_types = frozenset(ignore_types) - line_ctr = LineCounter() - last_token = None - - while line_ctr.char_pos < len(stream): - lexer = self.lexer - res = lexer.match(stream, line_ctr.char_pos) - if not res: - allowed = {v for m, tfi in lexer.mres for v in tfi.values()} - ignore_types - if not allowed: - allowed = {""} - raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token]) - - value, type_ = res - - if type_ not in ignore_types: - t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) - line_ctr.feed(value, type_ in newline_types) - t.end_line = line_ctr.line - t.end_column = line_ctr.column - t.end_pos = line_ctr.char_pos - if t.type in lexer.callback: - t = lexer.callback[t.type](t) - if not isinstance(t, Token): - raise ValueError("Callbacks must return a token (returned %r)" % t) - yield t - last_token = t - else: - if type_ in lexer.callback: - t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) - lexer.callback[type_](t2) - line_ctr.feed(value, type_ in newline_types) - - - class UnlessCallback: - def __init__(self, mres): - self.mres = mres + def __init__(self, scanner): + self.scanner = scanner def __call__(self, t): - for mre, type_from_index in self.mres: - m = mre.match(t.value) - if m: - t.type = type_from_index[m.lastindex] - break + res = self.scanner.match(t.value, 0) + if res: + _value, t.type = res return t + class CallChain: def __init__(self, callback1, callback2, cond): self.callback1 = callback1 @@ -1063,188 +1379,356 @@ def __call__(self, t): return self.callback2(t) if self.cond(t2) else t2 +def _get_match(re_, regexp, s, flags): + m = re_.match(regexp, s, flags) + if m: + return m.group(0) - - -def _create_unless(terminals, g_regex_flags): +def _create_unless(terminals, g_regex_flags, re_, use_bytes): tokens_by_type = classify(terminals, lambda t: type(t.pattern)) assert len(tokens_by_type) <= 2, tokens_by_type.keys() embedded_strs = set() callback = {} for retok in tokens_by_type.get(PatternRE, []): - unless = [] # {} + unless = [] for strtok in tokens_by_type.get(PatternStr, []): - if strtok.priority > retok.priority: + if strtok.priority != retok.priority: continue s = strtok.pattern.value - m = re.match(retok.pattern.to_regexp(), s, g_regex_flags) - if m and m.group(0) == s: + if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags): unless.append(strtok) if strtok.pattern.flags <= retok.pattern.flags: embedded_strs.add(strtok) if unless: - callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, match_whole=True)) + callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) - terminals = [t for t in terminals if t not in embedded_strs] - return terminals, callback + new_terminals = [t for t in terminals if t not in embedded_strs] + return new_terminals, callback -def _build_mres(terminals, max_size, g_regex_flags, match_whole): - # Python sets an unreasonable group limit (currently 100) in its re module - # Worse, the only way to know we reached it is by catching an AssertionError! - # This function recursively tries less and less groups until it's successful. - postfix = '$' if match_whole else '' - mres = [] - while terminals: - try: - mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags) - except AssertionError: # Yes, this is what Python provides us.. :/ - return _build_mres(terminals, max_size//2, g_regex_flags, match_whole) - - # terms_from_name = {t.name: t for t in terminals[:max_size]} - mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) - terminals = terminals[max_size:] - return mres - -def build_mres(terminals, g_regex_flags, match_whole=False): - return _build_mres(terminals, len(terminals), g_regex_flags, match_whole) - -def _regexp_has_newline(r): - r"""Expressions that may indicate newlines in a regexp: - - newlines (\n) - - escaped newline (\\n) - - anything but ([^...]) - - any-char (.) when the flag (?s) exists - - spaces (\s) - """ +class Scanner: + def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False): + self.terminals = terminals + self.g_regex_flags = g_regex_flags + self.re_ = re_ + self.use_bytes = use_bytes + self.match_whole = match_whole + + self.allowed_types = {t.name for t in self.terminals} + + self._mres = self._build_mres(terminals, len(terminals)) + + def _build_mres(self, terminals, max_size): + ## + + ## + + ## + + postfix = '$' if self.match_whole else '' + mres = [] + while terminals: + pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size]) + if self.use_bytes: + pattern = pattern.encode('latin-1') + try: + mre = self.re_.compile(pattern, self.g_regex_flags) + except AssertionError: ## + + return self._build_mres(terminals, max_size//2) + + mres.append(mre) + terminals = terminals[max_size:] + return mres + + def match(self, text, pos): + for mre in self._mres: + m = mre.match(text, pos) + if m: + return m.group(0), m.lastgroup + + +def _regexp_has_newline(r: str): + #-- return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) -class Lexer(object): - """Lexer interface - Method Signatures: - lex(self, stream) -> Iterator[Token] - """ - lex = NotImplemented +class LexerState: + #-- + + __slots__ = 'text', 'line_ctr', 'last_token' + + def __init__(self, text, line_ctr=None, last_token=None): + self.text = text + self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n') + self.last_token = last_token + + def __eq__(self, other): + if not isinstance(other, LexerState): + return NotImplemented + + return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token + + def __copy__(self): + return type(self)(self.text, copy(self.line_ctr), self.last_token) + + +class LexerThread: + #-- + + def __init__(self, lexer: 'Lexer', lexer_state: LexerState): + self.lexer = lexer + self.state = lexer_state + + @classmethod + def from_text(cls, lexer: 'Lexer', text: str): + return cls(lexer, LexerState(text)) + + def lex(self, parser_state): + return self.lexer.lex(self.state, parser_state) + + def __copy__(self): + return type(self)(self.lexer, copy(self.state)) + + _Token = Token + +_Callback = Callable[[Token], Token] -class TraditionalLexer(Lexer): +class Lexer(ABC): + #-- + @abstractmethod + def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: + return NotImplemented - def __init__(self, terminals, ignore=(), user_callbacks={}, g_regex_flags=0): + def make_lexer_state(self, text): + #-- + return LexerState(text) + + +class BasicLexer(Lexer): + + terminals: Collection[TerminalDef] + ignore_types: FrozenSet[str] + newline_types: FrozenSet[str] + user_callbacks: Dict[str, _Callback] + callback: Dict[str, _Callback] + re: ModuleType + + def __init__(self, conf: 'LexerConf') -> None: + terminals = list(conf.terminals) assert all(isinstance(t, TerminalDef) for t in terminals), terminals - terminals = list(terminals) + self.re = conf.re_module - # Sanitization - for t in terminals: - try: - re.compile(t.pattern.to_regexp(), g_regex_flags) - except re.error: - raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) + if not conf.skip_validation: + ## + + for t in terminals: + try: + self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags) + except self.re.error: + raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) - if t.pattern.min_width == 0: - raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) + if t.pattern.min_width == 0: + raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) - assert set(ignore) <= {t.name for t in terminals} + if not (set(conf.ignore) <= {t.name for t in terminals}): + raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals})) - # Init - self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] - self.ignore_types = list(ignore) + ## - terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) + self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) + self.ignore_types = frozenset(conf.ignore) + + terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) self.terminals = terminals - self.user_callbacks = user_callbacks - self.build(g_regex_flags) + self.user_callbacks = conf.callbacks + self.g_regex_flags = conf.g_regex_flags + self.use_bytes = conf.use_bytes + self.terminals_by_name = conf.terminals_by_name + + self._scanner = None - def build(self, g_regex_flags=0): - terminals, self.callback = _create_unless(self.terminals, g_regex_flags) + def _build_scanner(self): + terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) assert all(self.callback.values()) for type_, f in self.user_callbacks.items(): if type_ in self.callback: - # Already a callback there, probably UnlessCallback + ## + self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_) else: self.callback[type_] = f - self.mres = build_mres(terminals, g_regex_flags) + self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes) - def match(self, stream, pos): - for mre, type_from_index in self.mres: - m = mre.match(stream, pos) - if m: - return m.group(0), type_from_index[m.lastindex] + @property + def scanner(self): + if self._scanner is None: + self._build_scanner() + return self._scanner + + def match(self, text, pos): + return self.scanner.match(text, pos) - def lex(self, stream): - return _Lex(self).lex(stream, self.newline_types, self.ignore_types) + def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: + with suppress(EOFError): + while True: + yield self.next_token(state, parser_state) + + def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: + line_ctr = lex_state.line_ctr + while line_ctr.char_pos < len(lex_state.text): + res = self.match(lex_state.text, line_ctr.char_pos) + if not res: + allowed = self.scanner.allowed_types - self.ignore_types + if not allowed: + allowed = {""} + raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, + allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token], + state=parser_state, terminals_by_name=self.terminals_by_name) + + value, type_ = res + + if type_ not in self.ignore_types: + t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) + line_ctr.feed(value, type_ in self.newline_types) + t.end_line = line_ctr.line + t.end_column = line_ctr.column + t.end_pos = line_ctr.char_pos + if t.type in self.callback: + t = self.callback[t.type](t) + if not isinstance(t, Token): + raise LexError("Callbacks must return a token (returned %r)" % t) + lex_state.last_token = t + return t + else: + if type_ in self.callback: + t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) + self.callback[type_](t2) + line_ctr.feed(value, type_ in self.newline_types) + ## + raise EOFError(self) class ContextualLexer(Lexer): - def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}, g_regex_flags=0): - tokens_by_name = {} - for t in terminals: - assert t.name not in tokens_by_name, t - tokens_by_name[t.name] = t + lexers: Dict[str, BasicLexer] + root_lexer: BasicLexer + + def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None: + terminals = list(conf.terminals) + terminals_by_name = conf.terminals_by_name - lexer_by_tokens = {} + trad_conf = copy(conf) + trad_conf.terminals = terminals + + lexer_by_tokens: Dict[FrozenSet[str], BasicLexer] = {} self.lexers = {} for state, accepts in states.items(): key = frozenset(accepts) try: lexer = lexer_by_tokens[key] except KeyError: - accepts = set(accepts) | set(ignore) | set(always_accept) - state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name] - lexer = TraditionalLexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) + accepts = set(accepts) | set(conf.ignore) | set(always_accept) + lexer_conf = copy(trad_conf) + lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name] + lexer = BasicLexer(lexer_conf) lexer_by_tokens[key] = lexer self.lexers[state] = lexer - self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) + assert trad_conf.terminals is terminals + self.root_lexer = BasicLexer(trad_conf) - def lex(self, stream, get_parser_state): - parser_state = get_parser_state() - l = _Lex(self.lexers[parser_state], parser_state) + def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: try: - for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): - yield x - parser_state = get_parser_state() - l.lexer = self.lexers[parser_state] - l.state = parser_state # For debug only, no need to worry about multithreading + while True: + lexer = self.lexers[parser_state.position] + yield lexer.next_token(lexer_state, parser_state) + except EOFError: + pass except UnexpectedCharacters as e: - # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, - # but not in the current context. - # This tests the input against the global context, to provide a nicer error. - root_match = self.root_lexer.match(stream, e.pos_in_stream) - if not root_match: - raise + ## + + ## + + try: + last_token = lexer_state.last_token ## + + token = self.root_lexer.next_token(lexer_state, parser_state) + raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name) + except UnexpectedCharacters: + raise e ## + - value, type_ = root_match - t = Token(type_, value, e.pos_in_stream, e.line, e.column) - raise UnexpectedToken(t, e.allowed, state=e.state) +_ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]' +_LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]' +_Callback = Callable[[Token], Token] class LexerConf(Serialize): - __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags' + __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' __serialize_namespace__ = TerminalDef, - def __init__(self, tokens, ignore=(), postlex=None, callbacks=None, g_regex_flags=0): - self.tokens = tokens + terminals: Collection[TerminalDef] + re_module: ModuleType + ignore: Collection[str] + postlex: 'Optional[PostLex]' + callbacks: Dict[str, _Callback] + g_regex_flags: int + skip_validation: bool + use_bytes: bool + lexer_type: Optional[_LexerArgType] + + def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): + self.terminals = terminals + self.terminals_by_name = {t.name: t for t in self.terminals} + assert len(self.terminals) == len(self.terminals_by_name) self.ignore = ignore self.postlex = postlex self.callbacks = callbacks or {} self.g_regex_flags = g_regex_flags + self.re_module = re_module + self.skip_validation = skip_validation + self.use_bytes = use_bytes + self.lexer_type = None def _deserialize(self): - self.callbacks = {} # TODO + self.terminals_by_name = {t.name: t for t in self.terminals} + + def __deepcopy__(self, memo=None): + return type(self)( + deepcopy(self.terminals, memo), + self.re_module, + deepcopy(self.ignore, memo), + deepcopy(self.postlex, memo), + deepcopy(self.callbacks, memo), + deepcopy(self.g_regex_flags, memo), + deepcopy(self.skip_validation, memo), + deepcopy(self.use_bytes, memo), + ) + + +class ParserConf(Serialize): + __serialize_fields__ = 'rules', 'start', 'parser_type' + + def __init__(self, rules, callbacks, start): + assert isinstance(start, list) + self.rules = rules + self.callbacks = callbacks + self.start = start + + self.parser_type = None from functools import partial, wraps -from itertools import repeat, product +from itertools import product class ExpandSingleChild: @@ -1257,44 +1741,76 @@ def __call__(self, children): else: return self.node_builder(children) + + class PropagatePositions: - def __init__(self, node_builder): + def __init__(self, node_builder, node_filter=None): self.node_builder = node_builder + self.node_filter = node_filter def __call__(self, children): res = self.node_builder(children) if isinstance(res, Tree): - for c in children: - if isinstance(c, Tree) and not c.meta.empty: - res.meta.line = c.meta.line - res.meta.column = c.meta.column - res.meta.start_pos = c.meta.start_pos - res.meta.empty = False - break - elif isinstance(c, Token): - res.meta.line = c.line - res.meta.column = c.column - res.meta.start_pos = c.pos_in_stream - res.meta.empty = False - break - - for c in reversed(children): - if isinstance(c, Tree) and not c.meta.empty: - res.meta.end_line = c.meta.end_line - res.meta.end_column = c.meta.end_column - res.meta.end_pos = c.meta.end_pos - res.meta.empty = False - break - elif isinstance(c, Token): - res.meta.end_line = c.end_line - res.meta.end_column = c.end_column - res.meta.end_pos = c.end_pos - res.meta.empty = False - break + ## + + ## + + ## + + ## + + + res_meta = res.meta + + first_meta = self._pp_get_meta(children) + if first_meta is not None: + if not hasattr(res_meta, 'line'): + ## + + res_meta.line = getattr(first_meta, 'container_line', first_meta.line) + res_meta.column = getattr(first_meta, 'container_column', first_meta.column) + res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos) + res_meta.empty = False + + res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line) + res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column) + + last_meta = self._pp_get_meta(reversed(children)) + if last_meta is not None: + if not hasattr(res_meta, 'end_line'): + res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) + res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) + res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos) + res_meta.empty = False + + res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) + res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) return res + def _pp_get_meta(self, children): + for c in children: + if self.node_filter is not None and not self.node_filter(c): + continue + if isinstance(c, Tree): + if not c.meta.empty: + return c.meta + elif isinstance(c, Token): + return c + elif hasattr(c, '__lark_meta__'): + return c.__lark_meta__() + +def make_propagate_positions(option): + if callable(option): + return partial(PropagatePositions, node_filter=option) + elif option is True: + return PropagatePositions + elif option is False: + return None + + raise ConfigurationError('Invalid option for propagate_positions: %r' % option) + class ChildFilter: def __init__(self, to_include, append_none, node_builder): @@ -1318,8 +1834,9 @@ def __call__(self, children): return self.node_builder(filtered) + class ChildFilterLALR(ChildFilter): - "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" + #-- def __call__(self, children): filtered = [] @@ -1329,7 +1846,8 @@ def __call__(self, children): if to_expand: if filtered: filtered += children[i].children - else: # Optimize for left-recursion + else: ## + filtered = children[i].children else: filtered.append(children[i]) @@ -1339,8 +1857,9 @@ def __call__(self, children): return self.node_builder(filtered) + class ChildFilterLALR_NoPlaceholders(ChildFilter): - "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" + #-- def __init__(self, to_include, node_builder): self.node_builder = node_builder self.to_include = to_include @@ -1351,17 +1870,21 @@ def __call__(self, children): if to_expand: if filtered: filtered += children[i].children - else: # Optimize for left-recursion + else: ## + filtered = children[i].children else: filtered.append(children[i]) return self.node_builder(filtered) + def _should_expand(sym): return not sym.is_term and sym.name.startswith('_') -def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): - # Prepare empty_indices as: How many Nones to insert at each index? + +def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]): + ## + if _empty_indices: assert _empty_indices.count(False) == len(expansion) s = ''.join(str(int(b)) for b in _empty_indices) @@ -1384,41 +1907,44 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi if _empty_indices or ambiguous: return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add) else: - # LALR without placeholders + ## + return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) + class AmbiguousExpander: - """Deal with the case where we're expanding children ('_rule') into a parent but the children - are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself - ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children - into the right parents in the right places, essentially shifting the ambiguiuty up the tree.""" + #-- def __init__(self, to_expand, tree_class, node_builder): self.node_builder = node_builder self.tree_class = tree_class self.to_expand = to_expand def __call__(self, children): - def _is_ambig_tree(child): - return hasattr(child, 'data') and child.data == '_ambig' + def _is_ambig_tree(t): + return hasattr(t, 'data') and t.data == '_ambig' + + ## + + ## + + ## + + ## - #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities. - # All children of an _ambig node should be a derivation of that ambig node, hence - # it is safe to assume that if we see an _ambig node nested within an ambig node - # it is safe to simply expand it into the parent _ambig node as an alternative derivation. ambiguous = [] for i, child in enumerate(children): if _is_ambig_tree(child): if i in self.to_expand: ambiguous.append(i) - to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)] - child.expand_kids_by_index(*to_expand) + child.expand_kids_by_data('_ambig') if not ambiguous: return self.node_builder(children) - expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ] - return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) + expand = [child.children if i in ambiguous else (child,) for i, child in enumerate(children)] + return self.tree_class('_ambig', [self.node_builder(list(f)) for f in product(*expand)]) + def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): to_expand = [i for i, sym in enumerate(expansion) @@ -1426,23 +1952,62 @@ def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): if to_expand: return partial(AmbiguousExpander, to_expand, tree_class) -def ptb_inline_args(func): - @wraps(func) - def f(children): - return func(*children) - return f + +class AmbiguousIntermediateExpander: + #-- + + def __init__(self, tree_class, node_builder): + self.node_builder = node_builder + self.tree_class = tree_class + + def __call__(self, children): + def _is_iambig_tree(child): + return hasattr(child, 'data') and child.data == '_iambig' + + def _collapse_iambig(children): + #-- + + ## + + ## + + if children and _is_iambig_tree(children[0]): + iambig_node = children[0] + result = [] + for grandchild in iambig_node.children: + collapsed = _collapse_iambig(grandchild.children) + if collapsed: + for child in collapsed: + child.children += children[1:] + result += collapsed + else: + new_tree = self.tree_class('_inter', grandchild.children + children[1:]) + result.append(new_tree) + return result + + collapsed = _collapse_iambig(children) + if collapsed: + processed_nodes = [self.node_builder(c.children) for c in collapsed] + return self.tree_class('_ambig', processed_nodes) + + return self.node_builder(children) + + def inplace_transformer(func): @wraps(func) def f(children): - # function name in a Transformer is a rule name. + ## + tree = Tree(func.__name__, children) return func(tree) return f + def apply_visit_wrapper(func, name, wrapper): if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: raise NotImplementedError("Meta args not supported for internal transformer") + @wraps(func) def f(children): return wrapper(func, name, children, None) @@ -1450,50 +2015,54 @@ def f(children): class ParseTreeBuilder: - def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False): + def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False): self.tree_class = tree_class self.propagate_positions = propagate_positions - self.always_keep_all_tokens = keep_all_tokens self.ambiguous = ambiguous self.maybe_placeholders = maybe_placeholders self.rule_builders = list(self._init_builders(rules)) def _init_builders(self, rules): + propagate_positions = make_propagate_positions(self.propagate_positions) + for rule in rules: options = rule.options - keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens + keep_all_tokens = options.keep_all_tokens expand_single_child = options.expand1 wrapper_chain = list(filter(None, [ (expand_single_child and not rule.alias) and ExpandSingleChild, maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), - self.propagate_positions and PropagatePositions, + propagate_positions, self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), + self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) ])) yield rule, wrapper_chain - def create_callback(self, transformer=None): callbacks = {} + default_handler = getattr(transformer, '__default__', None) + if default_handler: + def default_callback(data, children): + return default_handler(data, children, None) + else: + default_callback = self.tree_class + for rule, wrapper_chain in self.rule_builders: - user_callback_name = rule.alias or rule.origin.name + user_callback_name = rule.alias or rule.options.template_source or rule.origin.name try: f = getattr(transformer, user_callback_name) - # XXX InlineTransformer is deprecated! wrapper = getattr(f, 'visit_wrapper', None) if wrapper is not None: f = apply_visit_wrapper(f, user_callback_name, wrapper) - else: - if isinstance(transformer, InlineTransformer): - f = ptb_inline_args(f) - elif isinstance(transformer, Transformer_InPlace): - f = inplace_transformer(f) + elif isinstance(transformer, Transformer_InPlace): + f = inplace_transformer(f) except AttributeError: - f = partial(self.tree_class, user_callback_name) + f = partial(default_callback, user_callback_name) for w in wrapper_chain: f = w(f) @@ -1506,97 +2075,196 @@ def create_callback(self, transformer=None): return callbacks -class LALR_Parser(object): + +class LALR_Parser(Serialize): def __init__(self, parser_conf, debug=False): - assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization" analysis = LALR_Analyzer(parser_conf, debug=debug) analysis.compute_lalr() callbacks = parser_conf.callbacks self._parse_table = analysis.parse_table self.parser_conf = parser_conf - self.parser = _Parser(analysis.parse_table, callbacks) + self.parser = _Parser(analysis.parse_table, callbacks, debug) @classmethod - def deserialize(cls, data, memo, callbacks): + def deserialize(cls, data, memo, callbacks, debug=False): inst = cls.__new__(cls) inst._parse_table = IntParseTable.deserialize(data, memo) - inst.parser = _Parser(inst._parse_table, callbacks) + inst.parser = _Parser(inst._parse_table, callbacks, debug) return inst - def serialize(self, memo): + def serialize(self, memo: Any = None) -> Dict[str, Any]: return self._parse_table.serialize(memo) - def parse(self, *args): - return self.parser.parse(*args) + def parse_interactive(self, lexer, start): + return self.parser.parse(lexer, start, start_interactive=True) + def parse(self, lexer, start, on_error=None): + try: + return self.parser.parse(lexer, start) + except UnexpectedInput as e: + if on_error is None: + raise + + while True: + if isinstance(e, UnexpectedCharacters): + s = e.interactive_parser.lexer_thread.state + p = s.line_ctr.char_pos + + if not on_error(e): + raise e + + if isinstance(e, UnexpectedCharacters): + ## + + if p == s.line_ctr.char_pos: + s.line_ctr.feed(s.text[p:p+1]) + + try: + return e.interactive_parser.resume_parse() + except UnexpectedToken as e2: + if (isinstance(e, UnexpectedToken) + and e.token.type == e2.token.type == '$END' + and e.interactive_parser == e2.interactive_parser): + ## + + raise e2 + e = e2 + except UnexpectedCharacters as e2: + e = e2 + + +class ParseConf: + __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states' + + def __init__(self, parse_table, callbacks, start): + self.parse_table = parse_table + + self.start_state = self.parse_table.start_states[start] + self.end_state = self.parse_table.end_states[start] + self.states = self.parse_table.states -class _Parser: - def __init__(self, parse_table, callbacks): - self.states = parse_table.states - self.start_states = parse_table.start_states - self.end_states = parse_table.end_states self.callbacks = callbacks + self.start = start + + +class ParserState: + __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack' - def parse(self, seq, start, set_state=None): - token = None - stream = iter(seq) - states = self.states + def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None): + self.parse_conf = parse_conf + self.lexer = lexer + self.state_stack = state_stack or [self.parse_conf.start_state] + self.value_stack = value_stack or [] + + @property + def position(self): + return self.state_stack[-1] - start_state = self.start_states[start] - end_state = self.end_states[start] + ## - state_stack = [start_state] - value_stack = [] + def __eq__(self, other): + if not isinstance(other, ParserState): + return NotImplemented + return len(self.state_stack) == len(other.state_stack) and self.position == other.position + + def __copy__(self): + return type(self)( + self.parse_conf, + self.lexer, ## - if set_state: set_state(start_state) + copy(self.state_stack), + deepcopy(self.value_stack), + ) - def get_action(token): + def copy(self): + return copy(self) + + def feed_token(self, token, is_end=False): + state_stack = self.state_stack + value_stack = self.value_stack + states = self.parse_conf.states + end_state = self.parse_conf.end_state + callbacks = self.parse_conf.callbacks + + while True: state = state_stack[-1] try: - return states[state][token.type] + action, arg = states[state][token.type] except KeyError: - expected = [s for s in states[state].keys() if s.isupper()] - raise UnexpectedToken(token, expected, state=state) - - def reduce(rule): - size = len(rule.expansion) - if size: - s = value_stack[-size:] - del state_stack[-size:] - del value_stack[-size:] - else: - s = [] + expected = {s for s in states[state].keys() if s.isupper()} + raise UnexpectedToken(token, expected, state=self, interactive_parser=None) - value = self.callbacks[rule](s) + assert arg != end_state - _action, new_state = states[state_stack[-1]][rule.origin.name] - assert _action is Shift - state_stack.append(new_state) - value_stack.append(value) + if action is Shift: + ## - # Main LALR-parser loop - for token in stream: - while True: - action, arg = get_action(token) - assert arg != end_state - - if action is Shift: - state_stack.append(arg) - value_stack.append(token) - if set_state: set_state(arg) - break # next token + assert not is_end + state_stack.append(arg) + value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) + return + else: + ## + + rule = arg + size = len(rule.expansion) + if size: + s = value_stack[-size:] + del state_stack[-size:] + del value_stack[-size:] else: - reduce(arg) + s = [] - token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) - while True: - _action, arg = get_action(token) - assert(_action is Reduce) - reduce(arg) - if state_stack[-1] == end_state: - return value_stack[-1] + value = callbacks[rule](s) + + _action, new_state = states[state_stack[-1]][rule.origin.name] + assert _action is Shift + state_stack.append(new_state) + value_stack.append(value) + + if is_end and state_stack[-1] == end_state: + return value_stack[-1] + +class _Parser: + def __init__(self, parse_table, callbacks, debug=False): + self.parse_table = parse_table + self.callbacks = callbacks + self.debug = debug + def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False): + parse_conf = ParseConf(self.parse_table, self.callbacks, start) + parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) + if start_interactive: + return InteractiveParser(self, parser_state, parser_state.lexer) + return self.parse_from_state(parser_state) + + + def parse_from_state(self, state, last_token=None): + #-- + try: + token = last_token + for token in state.lexer.lex(state): + state.feed_token(token) + + end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) + return state.feed_token(end_token, True) + except UnexpectedInput as e: + try: + e.interactive_parser = InteractiveParser(self, state, state.lexer) + except NameError: + pass + raise e + except Exception as e: + if self.debug: + print("") + print("STATE STACK DUMP") + print("----------------") + for i, s in enumerate(state.state_stack): + print('%d)' % i , s) + print("") + + raise class Action: @@ -1619,7 +2287,6 @@ def __init__(self, states, start_states, end_states): def serialize(self, memo): tokens = Enumerator() - rules = Enumerator() states = { state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg)) @@ -1665,183 +2332,275 @@ def from_ParseTable(cls, parse_table): -def get_frontend(parser, lexer): - if parser=='lalr': - if lexer is None: - raise ValueError('The LALR parser requires use of a lexer') - elif lexer == 'standard': - return LALR_TraditionalLexer - elif lexer == 'contextual': - return LALR_ContextualLexer - elif issubclass(lexer, Lexer): - return partial(LALR_CustomLexer, lexer) - else: - raise ValueError('Unknown lexer: %s' % lexer) - elif parser=='earley': - if lexer=='standard': - return Earley - elif lexer=='dynamic': - return XEarley - elif lexer=='dynamic_complete': - return XEarley_CompleteLex - elif lexer=='contextual': - raise ValueError('The Earley parser does not support the contextual parser') - else: - raise ValueError('Unknown lexer: %s' % lexer) - elif parser == 'cyk': - if lexer == 'standard': - return CYK - else: - raise ValueError('CYK parser requires using standard parser.') +def _wrap_lexer(lexer_class): + future_interface = getattr(lexer_class, '__future_interface__', False) + if future_interface: + return lexer_class else: - raise ValueError('Unknown parser: %s' % parser) + class CustomLexerWrapper(Lexer): + def __init__(self, lexer_conf): + self.lexer = lexer_class(lexer_conf) + def lex(self, lexer_state, parser_state): + return self.lexer.lex(lexer_state.text) + return CustomLexerWrapper -class _ParserFrontend(Serialize): - def _parse(self, input, start, *args): - if start is None: - start = self.start - if len(start) > 1: - raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) - start ,= start - return self.parser.parse(input, start, *args) +def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options): + parser_conf = ParserConf.deserialize(data['parser_conf'], memo) + cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser + parser = cls.deserialize(data['parser'], memo, callbacks, options.debug) + parser_conf.callbacks = callbacks + return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) + +_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {} -class WithLexer(_ParserFrontend): - lexer = None - parser = None - lexer_conf = None - start = None - __serialize_fields__ = 'parser', 'lexer_conf', 'start' - __serialize_namespace__ = LexerConf, +class ParsingFrontend(Serialize): + __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser' - def __init__(self, lexer_conf, parser_conf, options=None): + def __init__(self, lexer_conf, parser_conf, options, parser=None): + self.parser_conf = parser_conf self.lexer_conf = lexer_conf - self.start = parser_conf.start - self.postlex = lexer_conf.postlex + self.options = options - @classmethod - def deserialize(cls, data, memo, callbacks, postlex): - inst = super(WithLexer, cls).deserialize(data, memo) - inst.postlex = postlex - inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) - inst.init_lexer() - return inst + ## - def _serialize(self, data, memo): - data['parser'] = data['parser'].serialize(memo) + if parser: ## - def lex(self, *args): - stream = self.lexer.lex(*args) - return self.postlex.process(stream) if self.postlex else stream + self.parser = parser + else: + create_parser = _parser_creators.get(parser_conf.parser_type) + assert create_parser is not None, "{} is not supported in standalone mode".format( + parser_conf.parser_type + ) + self.parser = create_parser(lexer_conf, parser_conf, options) + + ## + + lexer_type = lexer_conf.lexer_type + self.skip_lexer = False + if lexer_type in ('dynamic', 'dynamic_complete'): + assert lexer_conf.postlex is None + self.skip_lexer = True + return - def parse(self, text, start=None): - token_stream = self.lex(text) - return self._parse(token_stream, start) + try: + create_lexer = { + 'basic': create_basic_lexer, + 'contextual': create_contextual_lexer, + }[lexer_type] + except KeyError: + assert issubclass(lexer_type, Lexer), lexer_type + self.lexer = _wrap_lexer(lexer_type)(lexer_conf) + else: + self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options) - def init_traditional_lexer(self): - self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags) + if lexer_conf.postlex: + self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) -class LALR_WithLexer(WithLexer): - def __init__(self, lexer_conf, parser_conf, options=None): - debug = options.debug if options else False - self.parser = LALR_Parser(parser_conf, debug=debug) - WithLexer.__init__(self, lexer_conf, parser_conf, options) + def _verify_start(self, start=None): + if start is None: + start_decls = self.parser_conf.start + if len(start_decls) > 1: + raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls) + start ,= start_decls + elif start not in self.parser_conf.start: + raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) + return start + + def _make_lexer_thread(self, text): + cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread + return text if self.skip_lexer else cls.from_text(self.lexer, text) + + def parse(self, text, start=None, on_error=None): + chosen_start = self._verify_start(start) + kw = {} if on_error is None else {'on_error': on_error} + stream = self._make_lexer_thread(text) + return self.parser.parse(stream, chosen_start, **kw) + + def parse_interactive(self, text=None, start=None): + chosen_start = self._verify_start(start) + if self.parser_conf.parser_type != 'lalr': + raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") + stream = self._make_lexer_thread(text) + return self.parser.parse_interactive(stream, chosen_start) + + +def _validate_frontend_args(parser, lexer) -> None: + assert_config(parser, ('lalr', 'earley', 'cyk')) + if not isinstance(lexer, type): ## + + expected = { + 'lalr': ('basic', 'contextual'), + 'earley': ('basic', 'dynamic', 'dynamic_complete'), + 'cyk': ('basic', ), + }[parser] + assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser) + + +def _get_lexer_callbacks(transformer, terminals): + result = {} + for terminal in terminals: + callback = getattr(transformer, terminal.name, None) + if callback is not None: + result[terminal.name] = callback + return result + +class PostLexConnector: + def __init__(self, lexer, postlexer): + self.lexer = lexer + self.postlexer = postlexer + + def lex(self, lexer_state, parser_state): + i = self.lexer.lex(lexer_state, parser_state) + return self.postlexer.process(i) - self.init_lexer() - def init_lexer(self): - raise NotImplementedError() -class LALR_TraditionalLexer(LALR_WithLexer): - def init_lexer(self): - self.init_traditional_lexer() +def create_basic_lexer(lexer_conf, parser, postlex, options): + cls = (options and options._plugins.get('BasicLexer')) or BasicLexer + return cls(lexer_conf) -class LALR_ContextualLexer(LALR_WithLexer): - def init_lexer(self): - states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} - always_accept = self.postlex.always_accept if self.postlex else () - self.lexer = ContextualLexer(self.lexer_conf.tokens, states, - ignore=self.lexer_conf.ignore, - always_accept=always_accept, - user_callbacks=self.lexer_conf.callbacks, - g_regex_flags=self.lexer_conf.g_regex_flags) +def create_contextual_lexer(lexer_conf, parser, postlex, options): + cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer + states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()} + always_accept = postlex.always_accept if postlex else () + return cls(lexer_conf, states, always_accept=always_accept) +def create_lalr_parser(lexer_conf, parser_conf, options=None): + debug = options.debug if options else False + cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser + return cls(parser_conf, debug=debug) - def parse(self, text, start=None): - parser_state = [None] - def set_parser_state(s): - parser_state[0] = s +_parser_creators['lalr'] = create_lalr_parser - token_stream = self.lex(text, lambda: parser_state[0]) - return self._parse(token_stream, start, set_parser_state) + +class PostLex(ABC): + @abstractmethod + def process(self, stream: Iterator[Token]) -> Iterator[Token]: + return stream + + always_accept: Iterable[str] = () + class LarkOptions(Serialize): - """Specifies the options for Lark + #-- + + start: List[str] + debug: bool + transformer: 'Optional[Transformer]' + propagate_positions: Union[bool, str] + maybe_placeholders: bool + cache: Union[bool, str] + regex: bool + g_regex_flags: int + keep_all_tokens: bool + tree_class: Any + parser: _ParserArgType + lexer: _LexerArgType + ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]' + postlex: Optional[PostLex] + priority: 'Optional[Literal["auto", "normal", "invert"]]' + lexer_callbacks: Dict[str, Callable[[Token], Token]] + use_bytes: bool + edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]] + import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' + source_path: Optional[str] - """ OPTIONS_DOC = """ -# General - - start - The start symbol. Either a string, or a list of strings for - multiple possible starts (Default: "start") - debug - Display debug information, such as warnings (default: False) - transformer - Applies the transformer to every parse tree (equivlent to - applying it after the parse, but faster) - propagate_positions - Propagates (line, column, end_line, end_column) - attributes into all tree branches. - maybe_placeholders - When True, the `[]` operator returns `None` when not matched. - When `False`, `[]` behaves like the `?` operator, - and returns no value at all. - (default=`False`. Recommended to set to `True`) - cache_grammar - Cache the Lark grammar (Default: False) - g_regex_flags - Flags that are applied to all terminals - (both regex and strings) - keep_all_tokens - Prevent the tree builder from automagically - removing "punctuation" tokens (default: False) - -# Algorithm - - parser - Decides which parser engine to use - Accepts "earley" or "lalr". (Default: "earley") - (there is also a "cyk" option for legacy) - - lexer - Decides whether or not to use a lexer stage - "auto" (default): Choose for me based on the parser - "standard": Use a standard lexer - "contextual": Stronger lexer (only works with parser="lalr") - "dynamic": Flexible and powerful (only with parser="earley") - "dynamic_complete": Same as dynamic, but tries *every* variation - of tokenizing possible. - - ambiguity - Decides how to handle ambiguity in the parse. - Only relevant if parser="earley" - "resolve": The parser will automatically choose the simplest - derivation (it chooses consistently: greedy for - tokens, non-greedy for rules) - "explicit": The parser will return all derivations wrapped - in "_ambig" tree nodes (i.e. a forest). - -# Domain Specific - - postlex - Lexer post-processing (Default: None) Only works with the - standard and contextual lexers. - priority - How priorities should be evaluated - auto, none, normal, - invert (Default: auto) - lexer_callbacks - Dictionary of callbacks for the lexer. May alter - tokens during lexing. Use with caution. - edit_terminals - A callback + **=== General Options ===** + + start + The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start") + debug + Display debug information and extra warnings. Use only when debugging (Default: ``False``) + When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed. + transformer + Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) + propagate_positions + Propagates (line, column, end_line, end_column) attributes into all tree branches. + Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. + maybe_placeholders + When ``True``, the ``[]`` operator returns ``None`` when not matched. + When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. + (default= ``True``) + cache + Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. + + - When ``False``, does nothing (default) + - When ``True``, caches to a temporary file in the local directory + - When given a string, caches to the path pointed by the string + regex + When True, uses the ``regex`` module instead of the stdlib ``re``. + g_regex_flags + Flags that are applied to all terminals (both regex and strings) + keep_all_tokens + Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``) + tree_class + Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``. + + **=== Algorithm Options ===** + + parser + Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley"). + (there is also a "cyk" option for legacy) + lexer + Decides whether or not to use a lexer stage + + - "auto" (default): Choose for me based on the parser + - "basic": Use a basic lexer + - "contextual": Stronger lexer (only works with parser="lalr") + - "dynamic": Flexible and powerful (only with parser="earley") + - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible. + ambiguity + Decides how to handle ambiguity in the parse. Only relevant if parser="earley" + + - "resolve": The parser will automatically choose the simplest derivation + (it chooses consistently: greedy for tokens, non-greedy for rules) + - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). + - "forest": The parser will return the root of the shared packed parse forest. + + **=== Misc. / Domain Specific Options ===** + + postlex + Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers. + priority + How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto") + lexer_callbacks + Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. + use_bytes + Accept an input of type ``bytes`` instead of ``str``. + edit_terminals + A callback for editing the terminals before parse. + import_paths + A List of either paths or loader functions to specify from where grammars are imported + source_path + Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading + **=== End of Options ===** """ if __doc__: __doc__ += OPTIONS_DOC - _defaults = { + + ## + + ## + + ## + + ## + + ## + + ## + + _defaults: Dict[str, Any] = { 'debug': False, 'keep_all_tokens': False, 'tree_class': None, - 'cache_grammar': False, + 'cache': False, 'postlex': None, 'parser': 'earley', 'lexer': 'auto', @@ -1849,259 +2608,558 @@ class LarkOptions(Serialize): 'start': 'start', 'priority': 'auto', 'ambiguity': 'auto', + 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, - 'maybe_placeholders': False, + 'maybe_placeholders': True, 'edit_terminals': None, 'g_regex_flags': 0, + 'use_bytes': False, + 'import_paths': [], + 'source_path': None, + '_plugins': {}, } - def __init__(self, options_dict): + def __init__(self, options_dict: Dict[str, Any]) -> None: o = dict(options_dict) options = {} for name, default in self._defaults.items(): if name in o: value = o.pop(name) - if isinstance(default, bool): + if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'): value = bool(value) else: value = default options[name] = value - if isinstance(options['start'], STRING_TYPE): + if isinstance(options['start'], str): options['start'] = [options['start']] self.__dict__['options'] = options - assert self.parser in ('earley', 'lalr', 'cyk', None) + + assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) if self.parser == 'earley' and self.transformer: - raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' + raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. ' 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') if o: - raise ValueError("Unknown options: %s" % o.keys()) + raise ConfigurationError("Unknown options: %s" % o.keys()) - def __getattr__(self, name): + def __getattr__(self, name: str) -> Any: try: - return self.options[name] + return self.__dict__['options'][name] except KeyError as e: raise AttributeError(e) - def __setattr__(self, name, value): - assert name in self.options + def __setattr__(self, name: str, value: str) -> None: + assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s") self.options[name] = value - def serialize(self, memo): + def serialize(self, memo = None) -> Dict[str, Any]: return self.options @classmethod - def deserialize(cls, data, memo): + def deserialize(cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]) -> "LarkOptions": return cls(data) +## + +## + +_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'} + +_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) +_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') + + +_T = TypeVar('_T', bound="Lark") + class Lark(Serialize): - def __init__(self, grammar, **options): - """ - grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax) - options : a dictionary controlling various aspects of Lark. - """ + #-- + + source_path: str + source_grammar: str + grammar: 'Grammar' + options: LarkOptions + lexer: Lexer + terminals: Collection[TerminalDef] + + def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: self.options = LarkOptions(options) + re_module: types.ModuleType - # Some, but not all file-like objects have a 'name' attribute - try: - self.source = grammar.name - except AttributeError: - self.source = '' + ## + + use_regex = self.options.regex + if use_regex: + if _has_regex: + re_module = regex + else: + raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.') + else: + re_module = re + + ## + + if self.options.source_path is None: + try: + self.source_path = grammar.name ## + + except AttributeError: + self.source_path = '' + else: + self.source_path = self.options.source_path + + ## - # Drain file-like objects to get their contents try: - read = grammar.read + read = grammar.read ## + except AttributeError: pass else: grammar = read() - assert isinstance(grammar, STRING_TYPE) + cache_fn = None + cache_md5 = None + if isinstance(grammar, str): + self.source_grammar = grammar + if self.options.use_bytes: + if not isascii(grammar): + raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") + + if self.options.cache: + if self.options.parser != 'lalr': + raise ConfigurationError("cache only works with parser='lalr' for now") + + unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins') + options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) + from . import __version__ + s = grammar + options_str + __version__ + str(sys.version_info[:2]) + cache_md5 = md5_digest(s) + + if isinstance(self.options.cache, str): + cache_fn = self.options.cache + else: + if self.options.cache is not True: + raise ConfigurationError("cache argument must be bool or str") + + try: + username = getpass.getuser() + except Exception: + ## + + ## + + ## + + username = "unknown" + + cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_md5, *sys.version_info[:2]) + + old_options = self.options + try: + with FS.open(cache_fn, 'rb') as f: + logger.debug('Loading grammar from cache: %s', cache_fn) + ## + + for name in (set(options) - _LOAD_ALLOWED_OPTIONS): + del options[name] + file_md5 = f.readline().rstrip(b'\n') + cached_used_files = pickle.load(f) + if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files): + cached_parser_data = pickle.load(f) + self._load(cached_parser_data, **options) + return + except FileNotFoundError: + ## + + pass + except Exception: ## + + logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn) + + ## + + ## + + self.options = old_options + + + ## + + self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) + else: + assert isinstance(grammar, Grammar) + self.grammar = grammar - if self.options.cache_grammar: - raise NotImplementedError("Not available yet") if self.options.lexer == 'auto': if self.options.parser == 'lalr': self.options.lexer = 'contextual' elif self.options.parser == 'earley': - self.options.lexer = 'dynamic' + if self.options.postlex is not None: + logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. " + "Consider using lalr with contextual instead of earley") + self.options.lexer = 'basic' + else: + self.options.lexer = 'dynamic' elif self.options.parser == 'cyk': - self.options.lexer = 'standard' + self.options.lexer = 'basic' else: assert False, self.options.parser lexer = self.options.lexer - assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer) + if isinstance(lexer, type): + assert issubclass(lexer, Lexer) ## + + else: + assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete')) + if self.options.postlex is not None and 'dynamic' in lexer: + raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead") if self.options.ambiguity == 'auto': if self.options.parser == 'earley': self.options.ambiguity = 'resolve' else: - disambig_parsers = ['earley', 'cyk'] - assert self.options.parser in disambig_parsers, ( - 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) + assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s") if self.options.priority == 'auto': - if self.options.parser in ('earley', 'cyk', ): - self.options.priority = 'normal' - elif self.options.parser in ('lalr', ): - self.options.priority = None - elif self.options.priority in ('invert', 'normal'): - assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" + self.options.priority = 'normal' + + if self.options.priority not in _VALID_PRIORITY_OPTIONS: + raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) + if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: + raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) - assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) - assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' - assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) + if self.options.parser is None: + terminals_to_keep = '*' + elif self.options.postlex is not None: + terminals_to_keep = set(self.options.postlex.always_accept) + else: + terminals_to_keep = set() - # Parse the grammar file and compose the grammars (TODO) - self.grammar = load_grammar(grammar, self.source) + ## - # Compile the EBNF grammar into BNF - self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) + self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep) if self.options.edit_terminals: for t in self.terminals: self.options.edit_terminals(t) - self._terminals_dict = {t.name:t for t in self.terminals} + self._terminals_dict = {t.name: t for t in self.terminals} + + ## - # If the user asked to invert the priorities, negate them all here. - # This replaces the old 'resolve__antiscore_sum' option. if self.options.priority == 'invert': for rule in self.rules: if rule.options.priority is not None: rule.options.priority = -rule.options.priority - # Else, if the user asked to disable priorities, strip them from the - # rules. This allows the Earley parsers to skip an extra forest walk - # for improved performance, if you don't need them (or didn't specify any). - elif self.options.priority == None: + for term in self.terminals: + term.priority = -term.priority + ## + + ## + + ## + + elif self.options.priority is None: for rule in self.rules: if rule.options.priority is not None: rule.options.priority = None - - # TODO Deprecate lexer_callbacks? - lexer_callbacks = dict(self.options.lexer_callbacks) - if self.options.transformer: - t = self.options.transformer for term in self.terminals: - if hasattr(t, term.name): - lexer_callbacks[term.name] = getattr(t, term.name) + term.priority = 0 - self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags) + ## + + self.lexer_conf = LexerConf( + self.terminals, re_module, self.ignore_tokens, self.options.postlex, + self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes + ) if self.options.parser: self.parser = self._build_parser() elif lexer: self.lexer = self._build_lexer() - if __init__.__doc__: - __init__.__doc__ += "\nOptions:\n" + LarkOptions.OPTIONS_DOC - - __serialize_fields__ = 'parser', 'rules', 'options' + if cache_fn: + logger.debug('Saving grammar to cache: %s', cache_fn) + try: + with FS.open(cache_fn, 'wb') as f: + assert cache_md5 is not None + f.write(cache_md5.encode('utf8') + b'\n') + pickle.dump(used_files, f) + self.save(f, _LOAD_ALLOWED_OPTIONS) + except IOError as e: + logger.exception("Failed to save Lark to cache: %r.", cache_fn, e) - def _build_lexer(self): - return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags) + if __doc__: + __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC - def _prepare_callbacks(self): - self.parser_class = get_frontend(self.options.parser, self.options.lexer) - self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) - self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) + __serialize_fields__ = 'parser', 'rules', 'options' - def _build_parser(self): + def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer: + lexer_conf = self.lexer_conf + if dont_ignore: + from copy import copy + lexer_conf = copy(lexer_conf) + lexer_conf.ignore = () + return BasicLexer(lexer_conf) + + def _prepare_callbacks(self) -> None: + self._callbacks = {} + ## + + if self.options.ambiguity != 'forest': + self._parse_tree_builder = ParseTreeBuilder( + self.rules, + self.options.tree_class or Tree, + self.options.propagate_positions, + self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', + self.options.maybe_placeholders + ) + self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) + self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals)) + + def _build_parser(self) -> "ParsingFrontend": self._prepare_callbacks() + _validate_frontend_args(self.options.parser, self.options.lexer) parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) - return self.parser_class(self.lexer_conf, parser_conf, options=self.options) + return _construct_parsing_frontend( + self.options.parser, + self.options.lexer, + self.lexer_conf, + parser_conf, + options=self.options + ) + + def save(self, f, exclude_options: Collection[str] = ()) -> None: + #-- + data, m = self.memo_serialize([TerminalDef, Rule]) + if exclude_options: + data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options} + pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL) @classmethod - def deserialize(cls, data, namespace, memo, transformer=None, postlex=None): - if memo: - memo = SerializeMemoizer.deserialize(memo, namespace, {}) + def load(cls: Type[_T], f) -> _T: + #-- inst = cls.__new__(cls) + return inst._load(f) + + def _deserialize_lexer_conf(self, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]], options: LarkOptions) -> LexerConf: + lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) + lexer_conf.callbacks = options.lexer_callbacks or {} + lexer_conf.re_module = regex if options.regex else re + lexer_conf.use_bytes = options.use_bytes + lexer_conf.g_regex_flags = options.g_regex_flags + lexer_conf.skip_validation = True + lexer_conf.postlex = options.postlex + return lexer_conf + + def _load(self: _T, f: Any, **kwargs) -> _T: + if isinstance(f, dict): + d = f + else: + d = pickle.load(f) + memo_json = d['memo'] + data = d['data'] + + assert memo_json + memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) options = dict(data['options']) - if transformer is not None: - options['transformer'] = transformer - if postlex is not None: - options['postlex'] = postlex - inst.options = LarkOptions.deserialize(options, memo) - inst.rules = [Rule.deserialize(r, memo) for r in data['rules']] - inst.source = '' - inst._prepare_callbacks() - inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks, inst.options.postlex) - return inst + if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): + raise ConfigurationError("Some options are not allowed when loading a Parser: {}" + .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) + options.update(kwargs) + self.options = LarkOptions.deserialize(options, memo) + self.rules = [Rule.deserialize(r, memo) for r in data['rules']] + self.source_path = '' + _validate_frontend_args(self.options.parser, self.options.lexer) + self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options) + self.terminals = self.lexer_conf.terminals + self._prepare_callbacks() + self._terminals_dict = {t.name: t for t in self.terminals} + self.parser = _deserialize_parsing_frontend( + data['parser'], + memo, + self.lexer_conf, + self._callbacks, + self.options, ## - def save(self, f): - data, m = self.memo_serialize([TerminalDef, Rule]) - pickle.dump({'data': data, 'memo': m}, f) + ) + return self @classmethod - def load(cls, f): - d = pickle.load(f) - namespace = {'Rule': Rule, 'TerminalDef': TerminalDef} - memo = d['memo'] - return Lark.deserialize(d['data'], namespace, memo) - + def _load_from_dict(cls, data, memo, **kwargs): + inst = cls.__new__(cls) + return inst._load({'data': data, 'memo': memo}, **kwargs) @classmethod - def open(cls, grammar_filename, rel_to=None, **options): - """Create an instance of Lark with the grammar given by its filename - - If rel_to is provided, the function will find the grammar filename in relation to it. - - Example: - - >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr") - Lark(...) - - """ + def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: + #-- if rel_to: basepath = os.path.dirname(rel_to) grammar_filename = os.path.join(basepath, grammar_filename) with open(grammar_filename, encoding='utf8') as f: return cls(f, **options) + @classmethod + def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T: + #-- + package_loader = FromPackageLoader(package, search_paths) + full_path, text = package_loader(None, grammar_path) + options.setdefault('source_path', full_path) + options.setdefault('import_paths', []) + options['import_paths'].append(package_loader) + return cls(text, **options) + def __repr__(self): - return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) + return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) - def lex(self, text): - "Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'" - if not hasattr(self, 'lexer'): - self.lexer = self._build_lexer() - stream = self.lexer.lex(text) + def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: + #-- + lexer: Lexer + if not hasattr(self, 'lexer') or dont_ignore: + lexer = self._build_lexer(dont_ignore) + else: + lexer = self.lexer + lexer_thread = LexerThread.from_text(lexer, text) + stream = lexer_thread.lex(None) if self.options.postlex: return self.options.postlex.process(stream) return stream - def get_terminal(self, name): - "Get information about a terminal" + def get_terminal(self, name: str) -> TerminalDef: + #-- return self._terminals_dict[name] - def parse(self, text, start=None): - """Parse the given text, according to the options provided. + def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': + #-- + return self.parser.parse_interactive(text, start=start) + + def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree': + #-- + return self.parser.parse(text, start=start, on_error=on_error) + + + + +class DedentError(LarkError): + pass + +class Indenter(PostLex, ABC): + paren_level: int + indent_level: List[int] + + def __init__(self) -> None: + self.paren_level = 0 + self.indent_level = [0] + assert self.tab_len > 0 + + def handle_NL(self, token: Token) -> Iterator[Token]: + if self.paren_level > 0: + return + + yield token + + indent_str = token.rsplit('\n', 1)[1] ## + + indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len + + if indent > self.indent_level[-1]: + self.indent_level.append(indent) + yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) + else: + while indent < self.indent_level[-1]: + self.indent_level.pop() + yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) + + if indent != self.indent_level[-1]: + raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1])) + + def _process(self, stream): + for token in stream: + if token.type == self.NL_type: + yield from self.handle_NL(token) + else: + yield token + + if token.type in self.OPEN_PAREN_types: + self.paren_level += 1 + elif token.type in self.CLOSE_PAREN_types: + self.paren_level -= 1 + assert self.paren_level >= 0 + + while len(self.indent_level) > 1: + self.indent_level.pop() + yield Token(self.DEDENT_type, '') + + assert self.indent_level == [0], self.indent_level + + def process(self, stream): + self.paren_level = 0 + self.indent_level = [0] + return self._process(stream) + + ## + + @property + def always_accept(self): + return (self.NL_type,) + + @property + @abstractmethod + def NL_type(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def OPEN_PAREN_types(self) -> List[str]: + raise NotImplementedError() + + @property + @abstractmethod + def CLOSE_PAREN_types(self) -> List[str]: + raise NotImplementedError() + + @property + @abstractmethod + def INDENT_type(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def DEDENT_type(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def tab_len(self) -> int: + raise NotImplementedError() - The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option). - Returns a tree, unless specified otherwise. - """ - return self.parser.parse(text, start=start) +class PythonIndenter(Indenter): + NL_type = '_NEWLINE' + OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] + CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] + INDENT_type = '_INDENT' + DEDENT_type = '_DEDENT' + tab_len = 8 +import pickle, zlib, base64 DATA = ( -{'parser': {'parser': {'tokens': {0: 'NAME', 1: 'series', 2: 'element', 3: 'LSQB', 4: 'STAR', 5: 'metadata', 6: 'trait', 7: 'series_terminal', 8: 'anytrait', 9: 'PLUS', 10: 'ITEMS', 11: 'items', 12: '$END', 13: 'COMMA', 14: 'DOT', 15: 'RSQB', 16: 'COLON', 17: 'notify', 18: 'quiet', 19: 'parallel', 20: 'start', 21: 'parallel_terminal'}, 'states': {0: {0: (0, 29)}, 1: {1: (0, 19), 2: (0, 34), 3: (0, 22), 0: (0, 9), 4: (0, 3), 5: (0, 30), 6: (0, 12), 7: (0, 13), 8: (0, 24), 9: (0, 0), 10: (0, 18), 11: (0, 14)}, 2: {5: (0, 30), 2: (0, 16), 8: (0, 8), 6: (0, 12), 4: (0, 3), 0: (0, 9), 3: (0, 22), 9: (0, 0), 10: (0, 18), 11: (0, 14)}, 3: {12: (1, {'@': 10}), 13: (1, {'@': 10})}, 4: {13: (1, {'@': 11}), 14: (1, {'@': 11}), 15: (1, {'@': 11}), 16: (1, {'@': 11})}, 5: {14: (1, {'@': 12}), 16: (1, {'@': 12}), 12: (1, {'@': 13}), 13: (1, {'@': 13})}, 6: {5: (0, 30), 10: (0, 18), 2: (0, 5), 6: (0, 12), 0: (0, 9), 3: (0, 22), 8: (0, 33), 9: (0, 0), 4: (0, 3), 11: (0, 14)}, 7: {13: (1, {'@': 14}), 14: (1, {'@': 14}), 15: (1, {'@': 14}), 16: (1, {'@': 14})}, 8: {12: (1, {'@': 15}), 13: (1, {'@': 15})}, 9: {14: (1, {'@': 16}), 12: (1, {'@': 16}), 13: (1, {'@': 16}), 16: (1, {'@': 16}), 15: (1, {'@': 16})}, 10: {13: (0, 1), 12: (1, {'@': 17})}, 11: {13: (0, 31), 15: (0, 35)}, 12: {14: (1, {'@': 18}), 12: (1, {'@': 18}), 13: (1, {'@': 18}), 16: (1, {'@': 18}), 15: (1, {'@': 18})}, 13: {12: (1, {'@': 19}), 13: (1, {'@': 19})}, 14: {14: (1, {'@': 20}), 12: (1, {'@': 20}), 13: (1, {'@': 20}), 16: (1, {'@': 20}), 15: (1, {'@': 20})}, 15: {16: (0, 21), 17: (0, 32), 14: (0, 26), 18: (0, 23), 15: (1, {'@': 21}), 13: (1, {'@': 21})}, 16: {14: (1, {'@': 14}), 16: (1, {'@': 14}), 12: (1, {'@': 22}), 13: (1, {'@': 22})}, 17: {}, 18: {14: (1, {'@': 23}), 12: (1, {'@': 23}), 13: (1, {'@': 23}), 16: (1, {'@': 23}), 15: (1, {'@': 23})}, 19: {16: (0, 21), 17: (0, 2), 14: (0, 26), 18: (0, 6)}, 20: {13: (1, {'@': 12}), 14: (1, {'@': 12}), 15: (1, {'@': 12}), 16: (1, {'@': 12})}, 21: {9: (1, {'@': 24}), 0: (1, {'@': 24}), 10: (1, {'@': 24}), 3: (1, {'@': 24}), 4: (1, {'@': 24})}, 22: {5: (0, 30), 1: (0, 15), 19: (0, 11), 6: (0, 12), 0: (0, 9), 3: (0, 22), 2: (0, 4), 9: (0, 0), 10: (0, 18), 11: (0, 14)}, 23: {5: (0, 30), 2: (0, 20), 6: (0, 12), 0: (0, 9), 3: (0, 22), 9: (0, 0), 10: (0, 18), 11: (0, 14)}, 24: {12: (1, {'@': 25}), 13: (1, {'@': 25})}, 25: {12: (1, {'@': 26}), 13: (1, {'@': 26})}, 26: {9: (1, {'@': 27}), 0: (1, {'@': 27}), 10: (1, {'@': 27}), 3: (1, {'@': 27}), 4: (1, {'@': 27})}, 27: {16: (0, 21), 17: (0, 32), 18: (0, 23), 14: (0, 26), 15: (1, {'@': 28}), 13: (1, {'@': 28})}, 28: {7: (0, 25), 1: (0, 19), 3: (0, 22), 0: (0, 9), 2: (0, 34), 20: (0, 17), 4: (0, 3), 5: (0, 30), 6: (0, 12), 21: (0, 10), 8: (0, 24), 9: (0, 0), 10: (0, 18), 11: (0, 14)}, 29: {14: (1, {'@': 29}), 12: (1, {'@': 29}), 13: (1, {'@': 29}), 16: (1, {'@': 29}), 15: (1, {'@': 29})}, 30: {14: (1, {'@': 30}), 12: (1, {'@': 30}), 13: (1, {'@': 30}), 16: (1, {'@': 30}), 15: (1, {'@': 30})}, 31: {5: (0, 30), 1: (0, 27), 6: (0, 12), 0: (0, 9), 3: (0, 22), 2: (0, 4), 9: (0, 0), 10: (0, 18), 11: (0, 14)}, 32: {5: (0, 30), 2: (0, 7), 6: (0, 12), 0: (0, 9), 3: (0, 22), 9: (0, 0), 10: (0, 18), 11: (0, 14)}, 33: {12: (1, {'@': 31}), 13: (1, {'@': 31})}, 34: {14: (1, {'@': 11}), 16: (1, {'@': 11}), 12: (1, {'@': 32}), 13: (1, {'@': 32})}, 35: {14: (1, {'@': 33}), 12: (1, {'@': 33}), 13: (1, {'@': 33}), 16: (1, {'@': 33}), 15: (1, {'@': 33})}}, 'start_states': {'start': 28}, 'end_states': {'start': 17}}, 'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}], 'ignore': ['WS'], 'g_regex_flags': 0, '__type__': 'LexerConf'}, 'start': ['start'], '__type__': 'LALR_ContextualLexer'}, 'rules': [{'@': 16}, {'@': 23}, {'@': 29}, {'@': 10}, {'@': 27}, {'@': 24}, {'@': 18}, {'@': 20}, {'@': 30}, {'@': 33}, {'@': 14}, {'@': 12}, {'@': 11}, {'@': 28}, {'@': 21}, {'@': 22}, {'@': 15}, {'@': 13}, {'@': 31}, {'@': 32}, {'@': 25}, {'@': 19}, {'@': 26}, {'@': 17}], 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache_grammar': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': None, 'ambiguity': 'auto', 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0}, '__type__': 'Lark'} +{'parser': {'lexer_conf': {'terminals': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}], 'ignore': ['WS'], 'g_regex_flags': 0, 'use_bytes': False, 'lexer_type': 'contextual', '__type__': 'LexerConf'}, 'parser_conf': {'rules': [{'@': 10}, {'@': 11}, {'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}, {'@': 21}, {'@': 22}, {'@': 23}, {'@': 24}, {'@': 25}, {'@': 26}, {'@': 27}, {'@': 28}, {'@': 29}, {'@': 30}, {'@': 31}, {'@': 32}, {'@': 33}], 'start': ['start'], 'parser_type': 'lalr', '__type__': 'ParserConf'}, 'parser': {'tokens': {0: 'trait', 1: 'PLUS', 2: 'NAME', 3: 'ITEMS', 4: 'metadata', 5: 'LSQB', 6: 'items', 7: 'element', 8: 'DOT', 9: 'COLON', 10: '$END', 11: 'COMMA', 12: 'parallel', 13: 'series', 14: 'STAR', 15: 'anytrait', 16: 'RSQB', 17: 'quiet', 18: 'notify', 19: 'parallel_terminal', 20: 'start', 21: 'series_terminal'}, 'states': {0: {0: (0, 17), 1: (0, 23), 2: (0, 25), 3: (0, 18), 4: (0, 14), 5: (0, 2), 6: (0, 4), 7: (0, 19)}, 1: {8: (1, {'@': 22}), 9: (1, {'@': 22}), 10: (1, {'@': 29}), 11: (1, {'@': 29})}, 2: {0: (0, 17), 1: (0, 23), 12: (0, 27), 2: (0, 25), 13: (0, 12), 3: (0, 18), 4: (0, 14), 5: (0, 2), 6: (0, 4), 7: (0, 15)}, 3: {0: (0, 17), 1: (0, 23), 2: (0, 25), 3: (0, 18), 14: (0, 5), 5: (0, 2), 7: (0, 35), 15: (0, 9), 4: (0, 14), 6: (0, 4)}, 4: {16: (1, {'@': 17}), 9: (1, {'@': 17}), 11: (1, {'@': 17}), 8: (1, {'@': 17}), 10: (1, {'@': 17})}, 5: {10: (1, {'@': 13}), 11: (1, {'@': 13})}, 6: {16: (1, {'@': 19}), 9: (1, {'@': 19}), 11: (1, {'@': 19}), 8: (1, {'@': 19}), 10: (1, {'@': 19})}, 7: {0: (0, 17), 1: (0, 23), 2: (0, 25), 3: (0, 18), 4: (0, 14), 5: (0, 2), 6: (0, 4), 7: (0, 8)}, 8: {8: (1, {'@': 20}), 16: (1, {'@': 20}), 9: (1, {'@': 20}), 11: (1, {'@': 20})}, 9: {10: (1, {'@': 26}), 11: (1, {'@': 26})}, 10: {17: (0, 0), 18: (0, 7), 8: (0, 29), 9: (0, 13), 16: (1, {'@': 23}), 11: (1, {'@': 23})}, 11: {10: (1, {'@': 30}), 11: (1, {'@': 30})}, 12: {17: (0, 0), 18: (0, 7), 8: (0, 29), 9: (0, 13), 16: (1, {'@': 24}), 11: (1, {'@': 24})}, 13: {5: (1, {'@': 15}), 3: (1, {'@': 15}), 2: (1, {'@': 15}), 1: (1, {'@': 15}), 14: (1, {'@': 15})}, 14: {16: (1, {'@': 18}), 9: (1, {'@': 18}), 11: (1, {'@': 18}), 8: (1, {'@': 18}), 10: (1, {'@': 18})}, 15: {8: (1, {'@': 22}), 16: (1, {'@': 22}), 9: (1, {'@': 22}), 11: (1, {'@': 22})}, 16: {1: (0, 23), 2: (0, 25), 19: (0, 20), 7: (0, 1), 13: (0, 31), 0: (0, 17), 15: (0, 11), 20: (0, 21), 3: (0, 18), 14: (0, 5), 4: (0, 14), 21: (0, 33), 5: (0, 2), 6: (0, 4)}, 17: {16: (1, {'@': 16}), 9: (1, {'@': 16}), 11: (1, {'@': 16}), 8: (1, {'@': 16}), 10: (1, {'@': 16})}, 18: {8: (1, {'@': 11}), 16: (1, {'@': 11}), 9: (1, {'@': 11}), 11: (1, {'@': 11}), 10: (1, {'@': 11})}, 19: {8: (1, {'@': 21}), 16: (1, {'@': 21}), 9: (1, {'@': 21}), 11: (1, {'@': 21})}, 20: {11: (0, 26), 10: (1, {'@': 33})}, 21: {}, 22: {8: (1, {'@': 21}), 9: (1, {'@': 21}), 10: (1, {'@': 27}), 11: (1, {'@': 27})}, 23: {2: (0, 28)}, 24: {15: (0, 34), 0: (0, 17), 1: (0, 23), 2: (0, 25), 3: (0, 18), 14: (0, 5), 4: (0, 14), 5: (0, 2), 6: (0, 4), 7: (0, 22)}, 25: {8: (1, {'@': 10}), 16: (1, {'@': 10}), 9: (1, {'@': 10}), 11: (1, {'@': 10}), 10: (1, {'@': 10})}, 26: {1: (0, 23), 2: (0, 25), 5: (0, 2), 7: (0, 1), 13: (0, 31), 0: (0, 17), 15: (0, 11), 3: (0, 18), 14: (0, 5), 4: (0, 14), 21: (0, 30), 6: (0, 4)}, 27: {16: (0, 6), 11: (0, 32)}, 28: {8: (1, {'@': 12}), 16: (1, {'@': 12}), 9: (1, {'@': 12}), 11: (1, {'@': 12}), 10: (1, {'@': 12})}, 29: {5: (1, {'@': 14}), 3: (1, {'@': 14}), 2: (1, {'@': 14}), 1: (1, {'@': 14}), 14: (1, {'@': 14})}, 30: {10: (1, {'@': 31}), 11: (1, {'@': 31})}, 31: {17: (0, 24), 18: (0, 3), 8: (0, 29), 9: (0, 13)}, 32: {0: (0, 17), 1: (0, 23), 2: (0, 25), 3: (0, 18), 13: (0, 10), 4: (0, 14), 5: (0, 2), 6: (0, 4), 7: (0, 15)}, 33: {10: (1, {'@': 32}), 11: (1, {'@': 32})}, 34: {10: (1, {'@': 28}), 11: (1, {'@': 28})}, 35: {8: (1, {'@': 20}), 9: (1, {'@': 20}), 10: (1, {'@': 25}), 11: (1, {'@': 25})}}, 'start_states': {'start': 16}, 'end_states': {'start': 21}}, '__type__': 'ParsingFrontend'}, 'rules': [{'@': 10}, {'@': 11}, {'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}, {'@': 21}, {'@': 22}, {'@': 23}, {'@': 24}, {'@': 25}, {'@': 26}, {'@': 27}, {'@': 28}, {'@': 29}, {'@': 30}, {'@': 31}, {'@': 32}, {'@': 33}], 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': 'normal', 'ambiguity': 'auto', 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, 'import_paths': [], 'source_path': None, '_plugins': {}}, '__type__': 'Lark'} ) MEMO = ( -{0: {'name': 'NAME', 'pattern': {'value': '[a-zA-Z_]\\w*', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 1: {'name': 'WS', 'pattern': {'value': '(?:[ \t\x0c\r\n])+', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 2: {'name': 'ITEMS', 'pattern': {'value': 'items', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 3: {'name': 'PLUS', 'pattern': {'value': '+', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 4: {'name': 'STAR', 'pattern': {'value': '*', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 5: {'name': 'DOT', 'pattern': {'value': '.', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 6: {'name': 'COLON', 'pattern': {'value': ':', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 7: {'name': 'LSQB', 'pattern': {'value': '[', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 8: {'name': 'RSQB', 'pattern': {'value': ']', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 9: {'name': 'COMMA', 'pattern': {'value': ',', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 10: {'origin': {'name': 'anytrait', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'STAR', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 11: {'origin': {'name': 'series', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'element', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 12: {'origin': {'name': 'series', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'quiet', '__type__': 'NonTerminal'}, {'name': 'element', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 13: {'origin': {'name': 'series_terminal', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'quiet', '__type__': 'NonTerminal'}, {'name': 'element', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 14: {'origin': {'name': 'series', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'notify', '__type__': 'NonTerminal'}, {'name': 'element', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 15: {'origin': {'name': 'series_terminal', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'notify', '__type__': 'NonTerminal'}, {'name': 'anytrait', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 16: {'origin': {'name': 'trait', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'NAME', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 17: {'origin': {'name': 'start', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'parallel_terminal', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 18: {'origin': {'name': 'element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'trait', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 19: {'origin': {'name': 'parallel_terminal', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'parallel_terminal', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'series_terminal', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 20: {'origin': {'name': 'element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'items', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 21: {'origin': {'name': 'parallel', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 22: {'origin': {'name': 'series_terminal', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'notify', '__type__': 'NonTerminal'}, {'name': 'element', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 23: {'origin': {'name': 'items', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'ITEMS', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 24: {'origin': {'name': 'quiet', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COLON', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 25: {'origin': {'name': 'series_terminal', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'anytrait', '__type__': 'NonTerminal'}], 'order': 5, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 26: {'origin': {'name': 'parallel_terminal', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series_terminal', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 27: {'origin': {'name': 'notify', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'DOT', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 28: {'origin': {'name': 'parallel', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'parallel', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'series', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 29: {'origin': {'name': 'metadata', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'PLUS', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'NAME', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 30: {'origin': {'name': 'element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'metadata', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 31: {'origin': {'name': 'series_terminal', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'quiet', '__type__': 'NonTerminal'}, {'name': 'anytrait', '__type__': 'NonTerminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 32: {'origin': {'name': 'series_terminal', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'element', '__type__': 'NonTerminal'}], 'order': 4, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 33: {'origin': {'name': 'element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'parallel', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}} +{0: {'name': 'WS', 'pattern': {'value': '(?:[ \t\x0c\r\n])+', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 1: {'name': 'NAME', 'pattern': {'value': '[a-zA-Z_]\\w*', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 2: {'name': 'ITEMS', 'pattern': {'value': 'items', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 3: {'name': 'PLUS', 'pattern': {'value': '+', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 4: {'name': 'STAR', 'pattern': {'value': '*', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 5: {'name': 'DOT', 'pattern': {'value': '.', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 6: {'name': 'COLON', 'pattern': {'value': ':', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 7: {'name': 'LSQB', 'pattern': {'value': '[', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 8: {'name': 'RSQB', 'pattern': {'value': ']', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 9: {'name': 'COMMA', 'pattern': {'value': ',', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 10: {'origin': {'name': Token('RULE', 'trait'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'NAME', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 11: {'origin': {'name': Token('RULE', 'items'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'ITEMS', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 12: {'origin': {'name': Token('RULE', 'metadata'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'PLUS', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'NAME', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 13: {'origin': {'name': Token('RULE', 'anytrait'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'STAR', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 14: {'origin': {'name': Token('RULE', 'notify'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'DOT', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 15: {'origin': {'name': Token('RULE', 'quiet'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COLON', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 16: {'origin': {'name': Token('RULE', 'element'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'trait', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 17: {'origin': {'name': Token('RULE', 'element'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'items', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 18: {'origin': {'name': Token('RULE', 'element'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'metadata', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 19: {'origin': {'name': Token('RULE', 'element'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'parallel', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 20: {'origin': {'name': Token('RULE', 'series'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'notify', '__type__': 'NonTerminal'}, {'name': 'element', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 21: {'origin': {'name': Token('RULE', 'series'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'quiet', '__type__': 'NonTerminal'}, {'name': 'element', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 22: {'origin': {'name': Token('RULE', 'series'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'element', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 23: {'origin': {'name': Token('RULE', 'parallel'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'parallel', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'series', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 24: {'origin': {'name': Token('RULE', 'parallel'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 25: {'origin': {'name': Token('RULE', 'series_terminal'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'notify', '__type__': 'NonTerminal'}, {'name': 'element', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 26: {'origin': {'name': Token('RULE', 'series_terminal'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'notify', '__type__': 'NonTerminal'}, {'name': 'anytrait', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 27: {'origin': {'name': Token('RULE', 'series_terminal'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'quiet', '__type__': 'NonTerminal'}, {'name': 'element', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 28: {'origin': {'name': Token('RULE', 'series_terminal'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': 'quiet', '__type__': 'NonTerminal'}, {'name': 'anytrait', '__type__': 'NonTerminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 29: {'origin': {'name': Token('RULE', 'series_terminal'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'element', '__type__': 'NonTerminal'}], 'order': 4, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 30: {'origin': {'name': Token('RULE', 'series_terminal'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'anytrait', '__type__': 'NonTerminal'}], 'order': 5, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 31: {'origin': {'name': Token('RULE', 'parallel_terminal'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'parallel_terminal', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'series_terminal', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 32: {'origin': {'name': Token('RULE', 'parallel_terminal'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series_terminal', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 33: {'origin': {'name': Token('RULE', 'start'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'parallel_terminal', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}} ) Shift = 0 Reduce = 1 -def Lark_StandAlone(transformer=None, postlex=None): - namespace = {'Rule': Rule, 'TerminalDef': TerminalDef} - return Lark.deserialize(DATA, namespace, MEMO, transformer=transformer, postlex=postlex) +def Lark_StandAlone(**kwargs): + return Lark._load_from_dict(DATA, MEMO, **kwargs)