-
-
Notifications
You must be signed in to change notification settings - Fork 824
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor[parser]: remove
ASTTokens
(#4364)
this commit removes `asttokens` from the parse machinery, since the method is buggy (see below bugs) and slow. this commit brings down parse time (time spent in ast generation) between 40-70%. the `mark_tokens()` machinery is replaced with a modified version of `python.ast`'s `fix_missing_locations()` function, which recurses through the AST and adds missing line info based on the parent node. it also changes to a more consistent method for updating source offsets that are modified by the `pre_parse` step, which fixes several outstanding bugs with source location reporting. there were some exceptions to the line info fixup working, the issues and corresponding workarounds are described as follows: - some python AST nodes returned by `ast.parse()` are singletons, which we work around by deepcopying the AST before operating on it. - notably, there is an interaction between our AST annotation and `coverage.py` in the case of `USub`. in this commit we paper over the issue by simply always overriding line info for `USub` nodes. in the future, we should refactor `VyperNode` generation by bypassing the python AST annotation step entirely, which is a more proper fix to the problems encountered in this PR. the `asttokens` package is not removed entirely since it still has a limited usage inside of the natspec parser. we could remove it in a future PR; for now it is out-of-scope. referenced bugs: - #2258 - #3059 - #3430 - #4139
- Loading branch information
1 parent
10e91d5
commit db8dcc7
Showing
5 changed files
with
236 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
""" | ||
Tests that the tokenizer / parser are passing correct source location | ||
info to the AST | ||
""" | ||
import pytest | ||
|
||
from vyper.ast.parse import parse_to_ast | ||
from vyper.compiler import compile_code | ||
from vyper.exceptions import UndeclaredDefinition | ||
|
||
|
||
def test_log_token_aligned(): | ||
# GH issue 3430 | ||
code = """ | ||
event A: | ||
b: uint256 | ||
@external | ||
def f(): | ||
log A(b=d) | ||
""" | ||
with pytest.raises(UndeclaredDefinition) as e: | ||
compile_code(code) | ||
|
||
expected = """ | ||
'd' has not been declared. | ||
function "f", line 7:12 | ||
6 def f(): | ||
---> 7 log A(b=d) | ||
-------------------^ | ||
8 | ||
""" # noqa: W291 | ||
assert expected.strip() == str(e.value).strip() | ||
|
||
|
||
def test_log_token_aligned2(): | ||
# GH issue 3059 | ||
code = """ | ||
interface Contract: | ||
def foo(): nonpayable | ||
event MyEvent: | ||
a: address | ||
@external | ||
def foo(c: Contract): | ||
log MyEvent(a=c.address) | ||
""" | ||
compile_code(code) | ||
|
||
|
||
def test_log_token_aligned3(): | ||
# https://github.com/vyperlang/vyper/pull/3808#pullrequestreview-1900570163 | ||
code = """ | ||
import ITest | ||
implements: ITest | ||
event Foo: | ||
a: address | ||
@external | ||
def foo(u: uint256): | ||
log Foo(empty(address)) | ||
log i.Foo(empty(address)) | ||
""" | ||
# not semantically valid code, check we can at least parse it | ||
assert parse_to_ast(code) is not None | ||
|
||
|
||
def test_log_token_aligned4(): | ||
# GH issue 4139 | ||
code = """ | ||
b: public(uint256) | ||
event Transfer: | ||
random: indexed(uint256) | ||
shi: uint256 | ||
@external | ||
def transfer(): | ||
log Transfer(T(self).b(), 10) | ||
return | ||
""" | ||
# not semantically valid code, check we can at least parse it | ||
assert parse_to_ast(code) is not None | ||
|
||
|
||
def test_long_string_non_coding_token(): | ||
# GH issue 2258 | ||
code = '\r[[]]\ndef _(e:[],l:[]):\n """"""""""""""""""""""""""""""""""""""""""""""""""""""\n f.n()' # noqa: E501 | ||
# not valid code, but should at least parse | ||
assert parse_to_ast(code) is not None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.