Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: more frontend optimizations #3785

Merged
merged 17 commits into from
Feb 19, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
optimize tokenization
  • Loading branch information
charles-cooper committed Feb 18, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit bbbb76eb2b8b362b255acbaab8f3712b7ccebe07
27 changes: 22 additions & 5 deletions vyper/ast/parse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import ast as python_ast
import io
import tokenize
from decimal import Decimal
from functools import cached_property
from typing import Any, Dict, List, Optional, Union, cast

import asttokens
@@ -266,6 +268,12 @@ def visit_ClassDef(self, node):
node.ast_type = self._modification_offsets[(node.lineno, node.col_offset)]
return node

@cached_property
def _dummy_tokens(self):
bytez = "dummy_target:\\\n foo".encode("utf-8")
token_list = list(tokenize.tokenize(io.BytesIO(bytez).readline))[:3]
return token_list

def visit_For(self, node):
"""
Visit a For node, splicing in the loop variable annotation provided by
@@ -300,8 +308,19 @@ def visit_For(self, node):
# in a bit, but for now lets us keep the line/col offset, and
# *also* gives us a valid AST. it doesn't matter what the dummy
# target name is, since it gets removed in a few lines.

# tokenization is a perf hotspot, so we manually construct the token
# list to pass to ASTTokens.
annotation_tokens = self._dummy_tokens + annotation_tokens

# ensure tokens are properly terminated
endline = annotation_tokens[-1].start[0]
annotation_tokens.append(
tokenize.TokenInfo(
type=tokenize.ENDMARKER, string="", start=(endline, 0), end=(endline, 0), line=""
)
)
annotation_str = tokenize.untokenize(annotation_tokens)
annotation_str = "dummy_target:" + annotation_str

try:
fake_node = python_ast.parse(annotation_str).body[0]
@@ -310,10 +329,8 @@ def visit_For(self, node):
"invalid type annotation", self._source_code, node.lineno, node.col_offset
) from e

# fill in with asttokens info. note we can use `self._tokens` because
# it is indented to exactly the same position where it appeared
# in the original source!
self._tokens.mark_tokens(fake_node)
# fill in with asttokens info.
asttokens.ASTTokens(annotation_str, tree=fake_node, tokens=annotation_tokens)

# replace the dummy target name with the real target name.
fake_node.target = node.target
Loading