Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor[venom]: make venom repr parseable #4402

Merged
merged 19 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions tests/functional/venom/test_venom_repr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import glob

import pytest

from tests.venom_utils import assert_ctx_eq, parse_venom
from vyper.compiler import compile_code
from vyper.compiler.settings import OptimizationLevel
from vyper.venom.context import IRContext

"""
Check that venom text format round-trips through parser
"""


def get_example_vy_filenames():
return glob.glob("**/*.vy", root_dir="examples/", recursive=True)


@pytest.mark.parametrize("vy_filename", get_example_vy_filenames())
def test_round_trip(vy_filename, optimize, request):
if optimize == OptimizationLevel.CODESIZE:
# codesize optimization issues things like `db b"\x12\x34"` which we
# don't handle.
request.node.add_marker(pytest.mark.xfail(strict=False, reason="unimplemented in parser"))

path = f"examples/{vy_filename}"
with open(path) as f:
vyper_source = f.read()

out = compile_code(vyper_source, output_formats=["bb_runtime"])
bb_runtime = out["bb_runtime"]
venom_code = IRContext.__repr__(bb_runtime)

ctx = parse_venom(venom_code)

assert_ctx_eq(bb_runtime, ctx)
1 change: 0 additions & 1 deletion tests/venom_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def assert_fn_eq(fn1: IRFunction, fn2: IRFunction):


def assert_ctx_eq(ctx1: IRContext, ctx2: IRContext):
assert ctx1.last_label == ctx2.last_label
assert len(ctx1.functions) == len(ctx2.functions)
for label1, fn1 in ctx1.functions.items():
assert label1 in ctx2.functions
Expand Down
46 changes: 20 additions & 26 deletions vyper/venom/basicblock.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json
import re
from typing import TYPE_CHECKING, Any, Iterator, Optional, Union

import vyper.venom.effects as effects
Expand Down Expand Up @@ -105,7 +107,7 @@ def __init__(self, line_no: int, src: str) -> None:

def __repr__(self) -> str:
src = self.src if self.src else ""
return f"\t# line {self.line_no}: {src}".expandtabs(20)
return f"\t; line {self.line_no}: {src}".expandtabs(20)


class IROperand:
Expand Down Expand Up @@ -183,10 +185,19 @@ class IRLabel(IROperand):
value: str

def __init__(self, value: str, is_symbol: bool = False) -> None:
assert isinstance(value, str), "value must be an str"
assert isinstance(value, str), f"not a str: {value} ({type(value)})"
assert len(value) > 0
self.value = value
self.is_symbol = is_symbol

_IS_IDENTIFIER = re.compile("[0-9a-zA-Z_]*")

def __repr__(self):
if self.__class__._IS_IDENTIFIER.fullmatch(self.value):
return self.value

return json.dumps(self.value) # escape it


class IRInstruction:
"""
Expand Down Expand Up @@ -360,35 +371,20 @@ def get_ast_source(self) -> Optional[IRnode]:
return inst.ast_source
return self.parent.parent.ast_source

def str_short(self) -> str:
s = ""
if self.output:
s += f"{self.output} = "
opcode = f"{self.opcode} " if self.opcode != "store" else ""
s += opcode
operands = self.operands
if opcode not in ["jmp", "jnz", "invoke"]:
operands = list(reversed(operands))
s += ", ".join(
[(f"label %{op}" if isinstance(op, IRLabel) else str(op)) for op in operands]
)
return s

def __repr__(self) -> str:
s = ""
if self.output:
s += f"{self.output} = "
opcode = f"{self.opcode} " if self.opcode != "store" else ""
s += opcode
operands = self.operands
if opcode not in ("jmp", "jnz", "invoke"):
if self.opcode not in ("jmp", "jnz", "invoke", "phi"):
# TODO: for invoke, maybe reverse the non-label instructions
operands = reversed(operands) # type: ignore
s += ", ".join(
[(f"label %{op}" if isinstance(op, IRLabel) else str(op)) for op in operands]
)
s += ", ".join([(f"@{op}" if isinstance(op, IRLabel) else str(op)) for op in operands])

if self.annotation:
s += f" <{self.annotation}>"
s += f" ; {self.annotation}"

return f"{s: <30}"

Expand Down Expand Up @@ -644,10 +640,8 @@ def copy(self):
return bb

def __repr__(self) -> str:
s = (
f"{repr(self.label)}: IN={[bb.label for bb in self.cfg_in]}"
f" OUT={[bb.label for bb in self.cfg_out]} => {self.out_vars}\n"
)
s = f"{self.label}: ; IN={[bb.label for bb in self.cfg_in]}"
s += f" OUT={[bb.label for bb in self.cfg_out]} => {self.out_vars}\n"
for instruction in self.instructions:
s += f" {str(instruction).strip()}\n"
s += f" {str(instruction).strip()}\n"
return s
6 changes: 3 additions & 3 deletions vyper/venom/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ def as_graph(self) -> str:
return "\n".join(s)

def __repr__(self) -> str:
s = ["IRContext:"]
s = []
for fn in self.functions.values():
s.append(fn.__repr__())
s.append("\n")

if len(self.data_segment) > 0:
s.append("\nData segment:")
s.append("\n[data]")
for inst in self.data_segment:
s.append(f"{inst}")
s.append(f" {inst}")

return "\n".join(s)
11 changes: 7 additions & 4 deletions vyper/venom/function.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import textwrap
from typing import Iterator, Optional

from vyper.codegen.ir_node import IRnode
Expand Down Expand Up @@ -41,7 +42,7 @@ def append_basic_block(self, bb: IRBasicBlock):
Append basic block to function.
"""
assert isinstance(bb, IRBasicBlock), bb
assert bb.label.name not in self._basic_block_dict
assert bb.label.name not in self._basic_block_dict, bb.label
self._basic_block_dict[bb.label.name] = bb

def remove_basic_block(self, bb: IRBasicBlock):
Expand Down Expand Up @@ -222,7 +223,9 @@ def _make_label(bb):
return "\n".join(ret)

def __repr__(self) -> str:
str = f"IRFunction: {self.name}\n"
ret = f"function {self.name} {{\n"
for bb in self.get_basic_blocks():
str += f"{bb}\n"
return str.strip()
bb_str = textwrap.indent(str(bb), " ")
ret += f"{bb_str}\n"
ret = ret.strip() + "\n}"
return ret.strip()
5 changes: 1 addition & 4 deletions vyper/venom/ir_node_to_venom.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,7 @@ def _convert_ir_bb(fn, ir, symbols):
label = IRLabel(ir.args[0].value)
ctx.append_data("dbname", [label])
for c in ir.args[1:]:
if isinstance(c, int):
assert 0 <= c <= 255, "data with invalid size"
ctx.append_data("db", [c]) # type: ignore
elif isinstance(c.value, bytes):
if isinstance(c.value, bytes):
ctx.append_data("db", [c.value]) # type: ignore
elif isinstance(c, IRnode):
data = _convert_ir_bb(fn, c, symbols)
Expand Down
43 changes: 31 additions & 12 deletions vyper/venom/parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

from lark import Lark, Transformer

from vyper.venom.basicblock import (
Expand All @@ -11,13 +13,14 @@
from vyper.venom.context import IRContext
from vyper.venom.function import IRFunction

VENOM_PARSER = Lark(
"""
VENOM_GRAMMAR = """
%import common.CNAME
%import common.DIGIT
%import common.LETTER
%import common.WS
%import common.INT
%import common.SIGNED_INT
%import common.ESCAPED_STRING
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wondering if it would be better to just change up some of the labels instead of parsing escaped string (I dont know how many cases are there I stumbled so far on case with the space which could be replaced by underscore)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i could do it both ways honestly, but this seemed nicer for debugging (you can imagine the frontend generating a sentence or description of where it came from for a label)


# Allow multiple comment styles
COMMENT: ";" /[^\\n]*/ | "//" /[^\\n]*/ | "#" /[^\\n]*/
Expand All @@ -26,13 +29,13 @@

# TODO: consider making entry block implicit, e.g.
# `"{" instruction+ block* "}"`
function: "function" NAME "{" block* "}"
function: "function" LABEL_IDENT "{" block* "}"

data_section: "[data]" instruction*

block: NAME ":" statement*
block: LABEL_IDENT ":" "\\n" statement*

statement: instruction | assignment
statement: (instruction | assignment) "\\n"
assignment: VAR_IDENT "=" expr
expr: instruction | operand
instruction: OPCODE operands_list?
Expand All @@ -41,16 +44,22 @@

operand: VAR_IDENT | CONST | LABEL

CONST: INT
CONST: SIGNED_INT
OPCODE: CNAME
VAR_IDENT: "%" NAME
LABEL: "@" NAME
VAR_IDENT: "%" NAME (":" INT)?

# handy for identifier to be an escaped string sometimes
# (especially for machine-generated labels)
LABEL_IDENT: (NAME | ESCAPED_STRING)
LABEL: "@" LABEL_IDENT

NAME: (DIGIT|LETTER|"_")+

%ignore WS
%ignore COMMENT
"""
)

VENOM_PARSER = Lark(VENOM_GRAMMAR)


def _set_last_var(fn: IRFunction):
Expand Down Expand Up @@ -100,7 +109,7 @@ def start(self, children) -> IRContext:
fn._basic_block_dict.clear()

for block_name, instructions in blocks:
bb = IRBasicBlock(IRLabel(block_name), fn)
bb = IRBasicBlock(IRLabel(block_name, True), fn)
fn.append_basic_block(bb)

for instruction in instructions:
Expand Down Expand Up @@ -166,16 +175,26 @@ def operand(self, children) -> IROperand:
def OPCODE(self, token):
return token.value

def LABEL_IDENT(self, label) -> str:
if label.startswith('"'):
# unescape the escaped string
label = json.loads(label)
return label

def LABEL(self, label) -> IRLabel:
return IRLabel(label[1:])
label = label[1:]
if label.startswith('"'):
# unescape the escaped string
label = json.loads(label)
return IRLabel(label, True)

def VAR_IDENT(self, var_ident) -> IRVariable:
parts = var_ident[1:].split(":", maxsplit=1)
assert 1 <= len(parts) <= 2
varname = parts[0]
version = None
if len(parts) > 1:
version = int(parts[1])
version = parts[1]
return IRVariable(varname, version=version)

def CONST(self, val) -> IRLiteral:
Expand Down
6 changes: 3 additions & 3 deletions vyper/venom/venom_to_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]:
data_segments: dict = dict()
for inst in ctx.data_segment:
if inst.opcode == "dbname":
label = inst.operands[0].value
data_segments[label] = [DataHeader(f"_sym_{label}")]
label = inst.operands[0]
data_segments[label] = [DataHeader(f"_sym_{label.value}")]
elif inst.opcode == "db":
data = inst.operands[0]
if isinstance(data, IRLabel):
Expand Down Expand Up @@ -293,7 +293,7 @@ def _generate_evm_for_basicblock_r(
asm = []

# assembly entry point into the block
asm.append(f"_sym_{basicblock.label}")
asm.append(f"_sym_{basicblock.label.value}")
asm.append("JUMPDEST")

if len(basicblock.cfg_in) == 1:
Expand Down
Loading