Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various fixes for parsing #306

Merged
merged 17 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion src/fandango/constraints/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def fitness(
eval(self.expression, self.global_variables, local_variables)
)
except Exception as e:
e.add_note("Evaluation failed: " + self.expression)
e.add_note(f"Evaluation failed: {self.expression}")
print_exception(e)
values.append(0)
# Create the fitness object
Expand All @@ -96,6 +96,9 @@ def __repr__(self):
)
return f"fitness {representation}"

def __str__(self):
return self.expression


class Constraint(GeneticBase, ABC):
"""
Expand Down Expand Up @@ -234,6 +237,14 @@ def __repr__(self):
)
return representation

def __str__(self):
representation = self.expression
for identifier in self.searches:
representation = representation.replace(
identifier, str(self.searches[identifier])
)
return representation

def accept(self, visitor: "ConstraintVisitor"):
"""
Accepts a visitor to traverse the constraint structure.
Expand Down Expand Up @@ -426,6 +437,14 @@ def __repr__(self):
)
return representation

def __str__(self):
representation = f"{self.left!s} {self.operator.value} {self.right!s}"
for identifier in self.searches:
representation = representation.replace(
identifier, str(self.searches[identifier])
)
return representation

def accept(self, visitor: "ConstraintVisitor"):
"""
Accepts a visitor to traverse the constraint structure.
Expand Down Expand Up @@ -500,6 +519,9 @@ def fitness(
def __repr__(self):
return "(" + " and ".join(repr(c) for c in self.constraints) + ")"

def __str__(self):
return "(" + " and ".join(str(c) for c in self.constraints) + ")"

def accept(self, visitor: "ConstraintVisitor"):
"""
Accepts a visitor to traverse the constraint structure.
Expand Down Expand Up @@ -577,6 +599,9 @@ def fitness(
def __repr__(self):
return "(" + " or ".join(repr(c) for c in self.constraints) + ")"

def __str__(self):
return "(" + " or ".join(str(c) for c in self.constraints) + ")"

def accept(self, visitor: "ConstraintVisitor"):
"""
Accepts a visitor to traverse the constraint structure.
Expand Down Expand Up @@ -638,6 +663,9 @@ def fitness(
def __repr__(self):
return f"({repr(self.antecedent)} -> {repr(self.consequent)})"

def __str__(self):
return f"({str(self.antecedent)} -> {str(self.consequent)})"

def accept(self, visitor: "ConstraintVisitor"):
"""
Accepts a visitor to traverse the constraint structure.
Expand Down Expand Up @@ -727,6 +755,9 @@ def fitness(
def __repr__(self):
return f"(exists {repr(self.bound)} in {repr(self.search)}: {repr(self.statement)})"

def __str__(self):
return f"(exists {str(self.bound)} in {str(self.search)}: {str(self.statement)})"

def accept(self, visitor: "ConstraintVisitor"):
"""
Accepts a visitor to traverse the constraint structure.
Expand Down Expand Up @@ -815,6 +846,9 @@ def fitness(
def __repr__(self):
return f"(forall {repr(self.bound)} in {repr(self.search)}: {repr(self.statement)})"

def __str__(self):
return f"(forall {str(self.bound)} in {str(self.search)}: {str(self.statement)})"

def accept(self, visitor: "ConstraintVisitor"):
"""
Accepts a visitor to traverse the constraint structure.
Expand Down
4 changes: 2 additions & 2 deletions src/fandango/evolution/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,11 @@ def fix_individual(self, individual: DerivationTree) -> DerivationTree:

if operator == Comparison.EQUAL and side == ComparisonSide.LEFT:
suggested_tree = self.grammar.parse(
str(value), failing_tree.tree.symbol
value, failing_tree.tree.symbol
)
if suggested_tree is None:
LOGGER.warning(
f"Suggested fix for {failing_tree.tree.symbol} returned None."
f"Could not parse {value!r} into {failing_tree.tree.symbol}"
)
continue
individual = individual.replace(
Expand Down
122 changes: 80 additions & 42 deletions src/fandango/language/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def __repr__(self):
f"({self.nonterminal} -> "
+ "".join(
[
f"{'•' if i == self._dot else ''}{s!r}"
f"{'•' if i == self._dot else ''}{s!s}"
for i, s in enumerate(self.symbols)
]
)
Expand Down Expand Up @@ -708,35 +708,73 @@ def scan_bytes(
table: List[Set[ParseState] | Column],
k: int,
w: int,
) -> tuple[bool, int]:
) -> bool:
"""
Scan a byte from the input `word`.
`state` is the current parse state.
`table` is the parse table.
`table[k]` is the current column.
`word[w]` is the current byte.
Return (True, #bytes) if bytes were matched, (False, 0) otherwise.
Return True if a byte was matched, False otherwise.
"""

assert not isinstance(state.dot.symbol, int)
assert not state.dot.is_regex

# LOGGER.debug(f"Checking byte(s) {state.dot!r} at position {w:#06x} ({w}) {word[w:]!r}")

match, match_length = state.dot.check(word[w:])
if match:
# Found a match
# LOGGER.debug(f"Matched {state.dot!r} at position {w:#06x} ({w}) (len = {match_length}) {word[w:w+match_length]!r}")
next_state = state.next()
next_state.children.append(
DerivationTree(Terminal(word[w:w+match_length]))
)
table[k + match_length].add(next_state)
self._max_position = max(self._max_position, w)
if not match:
return False

# Found a match
# LOGGER.debug(f"Matched byte(s) {state.dot!r} at position {w:#06x} ({w}) (len = {match_length}) {word[w:w + match_length]!r}")
next_state = state.next()
next_state.children.append(
DerivationTree(Terminal(word[w:w + match_length]))
)
table[k + match_length].add(next_state)
# LOGGER.debug(f"Next state: {next_state} at column {k + match_length}")
self._max_position = max(self._max_position, w + match_length)

return True

def scan_regex(
self,
state: ParseState,
word: str | bytes,
table: List[Set[ParseState] | Column],
k: int,
w: int,
) -> bool:
"""
Scan a byte from the input `word`.
`state` is the current parse state.
`table` is the parse table.
`table[k]` is the current column.
`word[w]` is the current byte.
Return (True, #bytes) if bytes were matched, (False, 0) otherwise.
"""

if not state.dot.is_regex:
# We only advance by more than 1 if we have regexes.
# Otherwise, we may skip alternatives.
match_length = 1
assert not isinstance(state.dot.symbol, int)
assert state.dot.is_regex

return match, match_length
# LOGGER.debug(f"Checking regex {state.dot!r} at position {w:#06x} ({w}) {word[w:]!r}")

match, match_length = state.dot.check(word[w:])
if not match:
return False

# Found a match
# LOGGER.debug(f"Matched regex {state.dot!r} at position {w:#06x} ({w}) (len = {match_length}) {word[w:w+match_length]!r}")
next_state = state.next()
next_state.children.append(
DerivationTree(Terminal(word[w:w+match_length]))
)
table[k + match_length].add(next_state)
# LOGGER.debug(f"Next state: {next_state} at column {k + match_length}")
self._max_position = max(self._max_position, w + match_length)
return True

def complete(
self,
Expand Down Expand Up @@ -793,9 +831,8 @@ def _parse_forest(
w = 0 # Index into the input word
bit_count = -1 # If > 0, indicates the next bit to be scanned (7-0)

while k < len(table) and w <= len(word):
scanned = 1

while k < len(table):
# LOGGER.debug(f"Processing {len(table[k])} states at column {k}")
for state in table[k]:
if w >= len(word):
if allow_incomplete:
Expand All @@ -805,10 +842,12 @@ def _parse_forest(
self.complete(state, table, k)

if state.finished():
if state.nonterminal == implicit_start and w >= len(word):
# LOGGER.debug(f"Found {len(state.children)} parse tree(s)")
for child in state.children:
yield child
# LOGGER.debug(f"Finished")
if state.nonterminal == implicit_start:
if w >= len(word):
# LOGGER.debug(f"Found {len(state.children)} parse tree(s)")
for child in state.children:
yield child

self.complete(state, table, k)
elif not state.is_incomplete:
Expand All @@ -824,10 +863,10 @@ def _parse_forest(
state, word, table, k, w, bit_count
)
if match:
LOGGER.debug(f"Matched bit {state} at position {w:#06x} ({w}) {word[w:]!r}")
scanned = 1
# LOGGER.debug(f"Matched bit {state} at position {w:#06x} ({w}) {word[w:]!r}")
pass
else:
# Scan a byte
# Scan a regex or a byte
if 0 <= bit_count <= 7:
# LOGGER.warning(f"Position {w:#06x} ({w}): Parsing a byte while expecting bit {bit_count}. Check if bits come in multiples of eight")

Expand All @@ -841,23 +880,22 @@ def _parse_forest(
# In either case, we need to skip back
# to scanning bytes here.
bit_count = -1
scanned = 1

# LOGGER.debug(f"Checking byte(s) {state} at position {w:#06x} ({w}) {word[w:]!r}")
match, match_length = \
self.scan_bytes(state, word, table, k, w)
if match:
LOGGER.debug(f"Matched {match_length} byte(s) {state} at position {w:#06x} ({w}) {word[w:]!r}")
scanned = max(scanned, match_length)

if scanned > 0:
LOGGER.debug(f"Scanned {scanned} byte(s) at position {w:#06x} ({w}); bit_count = {bit_count}")
if bit_count >= 0:
# Advance by one bit
bit_count -= 1
if bit_count < 0:
# Advance to next byte
w += scanned
if state.dot.is_regex:
match = self.scan_regex(state, word,
table, k, w)
else:
match = self.scan_bytes(state, word,
table, k, w)

# LOGGER.debug(f"Scanned {scanned} byte(s) at position {w:#06x} ({w}); bit_count = {bit_count}")
if bit_count >= 0:
# Advance by one bit
bit_count -= 1
if bit_count < 0:
# Advance to next byte
w += 1

k += 1

Expand Down
20 changes: 10 additions & 10 deletions src/fandango/language/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, filename=None):
super().__init__()

def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
raise SyntaxError(f"{repr(self.filename)}, line {line}, column {column}: {msg}")
raise SyntaxError(f"{self.filename!r}, line {line}, column {column}: {msg}")


def closest_match(word, candidates):
Expand Down Expand Up @@ -109,7 +109,7 @@ def include(file_to_be_included: str):
return

raise FileNotFoundError(
f"{CURRENT_FILENAME}: {repr(file_to_be_included)} not found in {':'.join(str(dir) for dir in dirs)}"
f"{CURRENT_FILENAME}: {file_to_be_included!r} not found in {':'.join(str(dir) for dir in dirs)}"
)


Expand Down Expand Up @@ -441,7 +441,7 @@ def check_grammar_consistency(
if start_symbol not in defined_symbols:
closest = closest_match(start_symbol, defined_symbols)
raise NameError(
f"Start symbol {start_symbol} not defined in grammar. Did you mean {closest}?"
f"Start symbol {start_symbol!s} not defined in grammar. Did you mean {closest!s}?"
)

def collect_used_symbols(tree):
Expand All @@ -465,11 +465,11 @@ def collect_used_symbols(tree):
and symbol not in given_used_symbols
and symbol != start_symbol
):
LOGGER.info(f"Symbol {symbol} defined, but not used")
LOGGER.info(f"Symbol {symbol!s} defined, but not used")

if undefined_symbols:
first_undefined_symbol = undefined_symbols.pop()
error = NameError(f"Undefined symbol {first_undefined_symbol} in grammar")
error = NameError(f"Undefined symbol {first_undefined_symbol!s} in grammar")
raise error


Expand Down Expand Up @@ -506,16 +506,16 @@ def check_constraints_existence(grammar, constraints):
closest = closest_match(first_missing_symbol, defined_symbols)

if len(missing) > 1:
missing_symbols = ", ".join(["<" + symbol + ">" for symbol in missing])
missing_symbols = ", ".join(["<" + str(symbol) + ">" for symbol in missing])
error = NameError(
f"{constraint}: undefined symbols {missing_symbols}. Did you mean {closest}?"
f"{constraint}: undefined symbols {missing_symbols}. Did you mean {closest!s}?"
)
raise error

if len(missing) == 1:
missing_symbol = missing[0]
error = NameError(
f"{constraint}: undefined symbol <{missing_symbol}>. Did you mean {closest}?"
f"{constraint}: undefined symbol <{missing_symbol!s}>. Did you mean {closest!s}?"
)
raise error

Expand All @@ -525,11 +525,11 @@ def check_constraints_existence(grammar, constraints):
# This handles <parent>[...].<symbol> as <parent>..<symbol>.
# We could also interpret the actual [...] contents here,
# but slices and chains could make this hard -- AZ
recurse = f"<{parent}>[" in str(value) or f"..<{symbol}>" in str(value)
recurse = f"<{parent!s}>[" in str(value) or f"..<{symbol!s}>" in str(value)
if not check_constraints_existence_children(
grammar, parent, symbol, recurse, indirect_child
):
msg = f"{constraint}: <{parent}> has no child <{symbol}>"
msg = f"{constraint!s}: <{parent!s}> has no child <{symbol!s}>"
raise ValueError(msg)


Expand Down
Loading
Loading