Skip to content

Commit

Permalink
fix: avoid max recursion errors in ast code. #1774
Browse files Browse the repository at this point in the history
  • Loading branch information
nedbat committed May 4, 2024
1 parent 34af01d commit 38ec386
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 28 deletions.
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ Unreleased
on the first line. This closes `issue 754`_. The fix was contributed by
`Daniel Diniz <pull 1773_>`_.

- Fix: very complex source files like `this one <resolvent_lookup_>`_ could
cause a maximum recursion error when creating an HTML report. This is now
fixed, closing `issue 1774`_.

- HTML report improvements:

- Support files (JavaScript and CSS) referenced by the HTML report now have
Expand All @@ -41,10 +45,13 @@ Unreleased
- Column sort order is remembered better as you move between the index pages,
fixing `issue 1766`_. Thanks, `Daniel Diniz <pull 1768_>`_.


.. _resolvent_lookup: https://github.com/sympy/sympy/blob/130950f3e6b3f97fcc17f4599ac08f70fdd2e9d4/sympy/polys/numberfields/resolvent_lookup.py
.. _issue 754: https://github.com/nedbat/coveragepy/issues/754
.. _issue 1766: https://github.com/nedbat/coveragepy/issues/1766
.. _pull 1768: https://github.com/nedbat/coveragepy/pull/1768
.. _pull 1773: https://github.com/nedbat/coveragepy/pull/1773
.. _issue 1774: https://github.com/nedbat/coveragepy/issues/1774


.. scriv-start-here
Expand Down
32 changes: 12 additions & 20 deletions coverage/phystokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,26 +78,19 @@ def _phys_tokens(toks: TokenInfos) -> TokenInfos:
last_lineno = elineno


class SoftKeywordFinder(ast.NodeVisitor):
def find_soft_key_lines(source: str) -> set[TLineNo]:
"""Helper for finding lines with soft keywords, like match/case lines."""
def __init__(self, source: str) -> None:
# This will be the set of line numbers that start with a soft keyword.
self.soft_key_lines: set[TLineNo] = set()
self.visit(ast.parse(source))

if sys.version_info >= (3, 10):
def visit_Match(self, node: ast.Match) -> None:
"""Invoked by ast.NodeVisitor.visit"""
self.soft_key_lines.add(node.lineno)
soft_key_lines: set[TLineNo] = set()

for node in ast.walk(ast.parse(source)):
if sys.version_info >= (3, 10) and isinstance(node, ast.Match):
soft_key_lines.add(node.lineno)
for case in node.cases:
self.soft_key_lines.add(case.pattern.lineno)
self.generic_visit(node)
soft_key_lines.add(case.pattern.lineno)
elif sys.version_info >= (3, 12) and isinstance(node, ast.TypeAlias):
soft_key_lines.add(node.lineno)

if sys.version_info >= (3, 12):
def visit_TypeAlias(self, node: ast.TypeAlias) -> None:
"""Invoked by ast.NodeVisitor.visit"""
self.soft_key_lines.add(node.lineno)
self.generic_visit(node)
return soft_key_lines


def source_token_lines(source: str) -> TSourceTokenLines:
Expand All @@ -124,7 +117,7 @@ def source_token_lines(source: str) -> TSourceTokenLines:
tokgen = generate_tokens(source)

if env.PYBEHAVIOR.soft_keywords:
soft_key_lines = SoftKeywordFinder(source).soft_key_lines
soft_key_lines = find_soft_key_lines(source)

for ttype, ttext, (sline, scol), (_, ecol), _ in _phys_tokens(tokgen):
mark_start = True
Expand All @@ -151,8 +144,7 @@ def source_token_lines(source: str) -> TSourceTokenLines:
# Need the version_info check to keep mypy from borking
# on issoftkeyword here.
if env.PYBEHAVIOR.soft_keywords and keyword.issoftkeyword(ttext):
# Soft keywords appear at the start of the line,
# on lines that start match or case statements.
# Soft keywords appear at the start of their line.
if len(line) == 0:
is_start_of_line = True
elif (len(line) == 1) and line[0][0] == "ws":
Expand Down
30 changes: 22 additions & 8 deletions coverage/regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Context:
lines: set[int]


class RegionFinder(ast.NodeVisitor):
class RegionFinder:
"""An ast visitor that will find and track regions of code.
Functions and classes are tracked by name. Results are in the .regions
Expand All @@ -34,13 +34,29 @@ def __init__(self) -> None:

def parse_source(self, source: str) -> None:
"""Parse `source` and walk the ast to populate the .regions attribute."""
self.visit(ast.parse(source))
self.handle_node(ast.parse(source))

def fq_node_name(self) -> str:
"""Get the current fully qualified name we're processing."""
return ".".join(c.name for c in self.context)

def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
def handle_node(self, node: ast.AST) -> None:
"""Recursively handle any node."""
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
self.handle_FunctionDef(node)
elif isinstance(node, ast.ClassDef):
self.handle_ClassDef(node)
elif isinstance(node, (ast.Module, ast.stmt)):
# Only modules and statements can contain function and class
# definitions. Handle them and ignore all others.
self.handle_node_body(node)

def handle_node_body(self, node: ast.AST) -> None:
"""Recursively handle the nodes in this node's body, if any."""
for body_node in getattr(node, "body", ()):
self.handle_node(body_node)

def handle_FunctionDef(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
"""Called for `def` or `async def`."""
lines = set(range(node.body[0].lineno, cast(int, node.body[-1].end_lineno) + 1))
if self.context and self.context[-1].kind == "class":
Expand All @@ -60,12 +76,10 @@ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
lines=lines,
)
)
self.generic_visit(node)
self.handle_node_body(node)
self.context.pop()

visit_AsyncFunctionDef = visit_FunctionDef # type: ignore[assignment]

def visit_ClassDef(self, node: ast.ClassDef) -> None:
def handle_ClassDef(self, node: ast.ClassDef) -> None:
"""Called for `class`."""
# The lines for a class are the lines in the methods of the class.
# We start empty, and count on visit_FunctionDef to add the lines it
Expand All @@ -80,7 +94,7 @@ def visit_ClassDef(self, node: ast.ClassDef) -> None:
lines=lines,
)
)
self.generic_visit(node)
self.handle_node_body(node)
self.context.pop()
# Class bodies should be excluded from the enclosing classes.
for ancestor in reversed(self.context):
Expand Down
1 change: 1 addition & 0 deletions tests/test_phystokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def match():
global case
""")
tokens = list(source_token_lines(source))
print(tokens)
assert tokens[0][0] == ("key", "match")
assert tokens[0][4] == ("nam", "match")
assert tokens[1][1] == ("key", "case")
Expand Down

0 comments on commit 38ec386

Please sign in to comment.