diff --git a/examples/script/concolic.py b/examples/script/concolic.py index ab75e682a..ce3a90292 100755 --- a/examples/script/concolic.py +++ b/examples/script/concolic.py @@ -22,6 +22,8 @@ from manticore.core.plugin import ExtendedTracer, Follower, Plugin from manticore.core.smtlib.constraints import ConstraintSet from manticore.core.smtlib.solver import Z3Solver +from manticore.core.smtlib.visitors import GetDeclarations + from manticore.utils import config import copy @@ -136,8 +138,17 @@ def perm(lst, func): def constraints_to_constraintset(constupl): + # originally those constraints belonged to a different ConstraintSet + # This is a hack x = ConstraintSet() - x._constraints = list(constupl) + + declarations = GetDeclarations() + for a in constupl: + declarations.visit(a) + x.add(a) + for d in declarations.result: + x._declare(d) + return x diff --git a/manticore/core/manticore.py b/manticore/core/manticore.py index c2e0b47a8..92abd8c06 100644 --- a/manticore/core/manticore.py +++ b/manticore/core/manticore.py @@ -525,7 +525,7 @@ def verbosity(level): set_verbosity(level) # State storage - @Eventful.will_did("save_state") + @Eventful.will_did("save_state", can_raise=False) def _save(self, state, state_id=None): """ Store or update a state in secondary storage under state_id. Use a fresh id is None is provided. @@ -538,7 +538,7 @@ def _save(self, state, state_id=None): state._id = self._workspace.save_state(state, state_id=state_id) return state.id - @Eventful.will_did("load_state") + @Eventful.will_did("load_state", can_raise=False) def _load(self, state_id): """ Load the state from the secondary storage @@ -557,7 +557,7 @@ def _load(self, state_id): self.stcache[state_id] = state return state - @Eventful.will_did("remove_state") + @Eventful.will_did("remove_state", can_raise=False) def _remove(self, state_id): """ Remove a state from secondary storage diff --git a/manticore/core/smtlib/constraints.py b/manticore/core/smtlib/constraints.py index 711e5e91a..b35519aee 100644 --- a/manticore/core/smtlib/constraints.py +++ b/manticore/core/smtlib/constraints.py @@ -1,9 +1,11 @@ import itertools import sys - +import copy +from typing import Optional from ...utils.helpers import PickleSerializer from ...exceptions import SmtlibError from .expression import ( + Expression, BitVecVariable, BoolVariable, ArrayVariable, @@ -16,19 +18,19 @@ Variable, Constant, ) -from .visitors import GetDeclarations, TranslatorSmtlib, get_variables, simplify, replace +from .visitors import ( + GetDeclarations, + TranslatorSmtlib, + get_variables, + simplify, + replace, + pretty_print, +) from ...utils import config import logging logger = logging.getLogger(__name__) -consts = config.get_group("smt") -consts.add( - "related_constraints", - default=False, - description="Try slicing the current path constraint to contain only related items", -) - class ConstraintException(SmtlibError): """ @@ -115,7 +117,7 @@ def _get_sid(self) -> int: self._sid += 1 return self._sid - def __get_related(self, related_to=None): + def related_to(self, *related_to) -> "ConstraintSet": # sam.moelius: There is a flaw in how __get_related works: when called on certain # unsatisfiable sets, it can return a satisfiable one. The flaw arises when: # * self consists of a single constraint C @@ -127,49 +129,59 @@ def __get_related(self, related_to=None): # set. Thus, __get_related was called on an unsatisfiable set, {C}, but it returned a # satisfiable one, {}. # In light of the above, the core __get_related logic is currently disabled. - # if related_to is not None: - # feliam: This assumes the previous constraints are already SAT (normal SE forking) - if consts.related_constraints and related_to is not None: - number_of_constraints = len(self.constraints) - remaining_constraints = set(self.constraints) - related_variables = get_variables(related_to) - related_constraints = set() - - added = True - while added: - added = False - logger.debug("Related variables %r", [x.name for x in related_variables]) - for constraint in list(remaining_constraints): - if isinstance(constraint, BoolConstant): - if constraint.value: - continue - else: - related_constraints = {constraint} - break - - variables = get_variables(constraint) - if related_variables & variables or not (variables): - remaining_constraints.remove(constraint) - related_constraints.add(constraint) - related_variables |= variables - added = True - - logger.debug( - "Reduced %d constraints!!", number_of_constraints - len(related_constraints) - ) - else: - related_variables = set() - for constraint in self.constraints: - related_variables |= get_variables(constraint) - related_constraints = set(self.constraints) - return related_variables, related_constraints - - def to_string(self, related_to=None, replace_constants=False): - related_variables, related_constraints = self.__get_related(related_to) + """ + Slices this ConstraintSet keeping only the related constraints. + Two constraints are independient if they can be expressed full using a + disjoint set of variables. + Todo: Research. constraints refering differen not overlapping parts of the same array + should be considered independient. + :param related_to: An expression + :return: + """ + + if not related_to: + return copy.copy(self) + number_of_constraints = len(self.constraints) + remaining_constraints = set(self.constraints) + related_variables = set() + for expression in related_to: + related_variables |= get_variables(expression) + related_constraints = set() + + added = True + while added: + added = False + logger.debug("Related variables %r", [x.name for x in related_variables]) + for constraint in list(remaining_constraints): + if isinstance(constraint, BoolConstant): + if constraint.value: + continue + else: + related_constraints = {constraint} + break + + variables = get_variables(constraint) + if related_variables & variables or not (variables): + remaining_constraints.remove(constraint) + related_constraints.add(constraint) + related_variables |= variables + added = True + + logger.debug("Reduced %d constraints!!", number_of_constraints - len(related_constraints)) + # related_variables, related_constraints + cs = ConstraintSet() + for var in related_variables: + cs._declare(var) + for constraint in related_constraints: + cs.add(constraint) + return cs + + def to_string(self, replace_constants: bool = False) -> str: + variables, constraints = self.get_declared_variables(), self.constraints if replace_constants: constant_bindings = {} - for expression in related_constraints: + for expression in constraints: if ( isinstance(expression, BoolEqual) and isinstance(expression.operands[0], Variable) @@ -179,7 +191,7 @@ def to_string(self, related_to=None, replace_constants=False): tmp = set() result = "" - for var in related_variables: + for var in variables: # FIXME # band aid hack around the fact that we are double declaring stuff :( :( if var.declaration in tmp: @@ -187,8 +199,9 @@ def to_string(self, related_to=None, replace_constants=False): continue tmp.add(var.declaration) result += var.declaration + "\n" + translator = TranslatorSmtlib(use_bindings=True) - for constraint in related_constraints: + for constraint in constraints: if replace_constants: constraint = simplify(replace(constraint, constant_bindings)) # if no variables then it is a constant diff --git a/manticore/core/smtlib/expression.py b/manticore/core/smtlib/expression.py index 7e964ad8a..66296ae36 100644 --- a/manticore/core/smtlib/expression.py +++ b/manticore/core/smtlib/expression.py @@ -504,6 +504,13 @@ def __hash__(self): def value(self): return self._value + @property + def signed_value(self): + if self._value & self.signmask: + return self._value - (1 << self.size) + else: + return self._value + class BitVecOperation(BitVec): __slots__ = ["_operands"] @@ -960,7 +967,7 @@ def value(self): return self.operands[2] -class ArraySlice(Array): +class ArraySlice(ArrayOperation): def __init__( self, array: Union["Array", "ArrayProxy"], offset: int, size: int, *args, **kwargs ): @@ -968,23 +975,22 @@ def __init__( raise ValueError("Array expected") if isinstance(array, ArrayProxy): array = array._array - super().__init__(array.index_bits, array.index_max, array.value_bits, *args, **kwargs) + super().__init__(array, **kwargs) - self._array = array self._slice_offset = offset self._slice_size = size @property - def underlying_variable(self): - return self._array.underlying_variable + def array(self): + return self.operands[0] @property - def operands(self): - return self._array.operands + def underlying_variable(self): + return self.array.underlying_variable @property def index_bits(self): - return self._array.index_bits + return self.array.index_bits @property def index_max(self): @@ -992,18 +998,14 @@ def index_max(self): @property def value_bits(self): - return self._array.value_bits - - @property - def taint(self): - return self._array.taint + return self.array.value_bits def select(self, index): - return self._array.select(index + self._slice_offset) + return self.array.select(index + self._slice_offset) def store(self, index, value): return ArraySlice( - self._array.store(index + self._slice_offset, value), + self.array.store(index + self._slice_offset, value), self._slice_offset, self._slice_size, ) @@ -1048,7 +1050,7 @@ def name(self): @property def operands(self): - return self._array.operands + return (self._array,) @property def index_bits(self): @@ -1145,13 +1147,13 @@ def written(self): # take out Proxy sleve array = self._array offset = 0 - while isinstance(array, ArraySlice): - # if it is a proxy over a slice take out the slice too - offset += array._slice_offset - array = array._array while not isinstance(array, ArrayVariable): - # The index written to underlaying Array are displaced when sliced - written.add(array.index - offset) + if isinstance(array, ArraySlice): + # if it is a proxy over a slice take out the slice too + offset += array._slice_offset + else: + # The index written to underlaying Array are displaced when sliced + written.add(array.index - offset) array = array.array assert isinstance(array, ArrayVariable) self._written = written diff --git a/manticore/core/smtlib/solver.py b/manticore/core/smtlib/solver.py index 365b83a0e..9d512c61a 100644 --- a/manticore/core/smtlib/solver.py +++ b/manticore/core/smtlib/solver.py @@ -205,6 +205,8 @@ def stop(self): self._proc.stdout.close() # Kill the process self._proc.kill() + self._proc.wait() + # No need to wait for termination, zombies avoided. self._proc = None @@ -217,7 +219,6 @@ def __readline_and_count(self): if self._debug: if "(error" in buf: raise SolverException(f"Error in smtlib: {buf}") - # lparen, rparen = buf.count("("), buf.count(")") lparen, rparen = map(sum, zip(*((c == "(", c == ")") for c in buf))) return buf, lparen, rparen @@ -229,7 +230,6 @@ def send(self, cmd: str) -> None: """ if self._debug: logger.debug(">%s", cmd) - print(">", cmd) self._proc.stdout.flush() # type: ignore self._proc.stdin.write(f"{cmd}\n") # type: ignore @@ -247,7 +247,6 @@ def recv(self) -> str: if self._debug: logger.debug("<%s", buf) - print("<", buf) return buf @@ -269,6 +268,7 @@ def __init__( support_reset: bool = False, support_minmax: bool = False, support_pushpop: bool = False, + multiple_check: bool = True, debug: bool = False, ): @@ -295,6 +295,7 @@ def __init__( self._support_minmax = support_minmax self._support_reset = support_reset self._support_pushpop = support_pushpop + self._multiple_check = multiple_check if not self._support_pushpop: setattr(self, "_push", None) @@ -307,8 +308,6 @@ def __init__( self._smtlib.start() # run solver specific initializations - for cfg in self._init: - self._smtlib.send(cfg) def _reset(self, constraints: Optional[str] = None) -> None: """Auxiliary method to reset the smtlib external solver to initial defaults""" @@ -411,7 +410,8 @@ def can_be_true(self, constraints: ConstraintSet, expression: Union[bool, Bool] with constraints as temp_cs: temp_cs.add(expression) - return self.can_be_true(temp_cs) + self._reset(temp_cs.to_string()) + return self._is_sat() # get-all-values min max minmax def _optimize_generic(self, constraints: ConstraintSet, x: BitVec, goal: str, max_iter=10000): @@ -432,10 +432,9 @@ def _optimize_generic(self, constraints: ConstraintSet, x: BitVec, goal: str, ma start = time.time() with constraints as temp_cs: - X = temp_cs.new_bitvec(x.size) + X = temp_cs.new_bitvec(x.size) # _getvalue needs a Variable temp_cs.add(X == x) - aux = temp_cs.new_bitvec(X.size, name="optimized_") - self._reset(temp_cs.to_string(related_to=X)) + self._reset(temp_cs.to_string()) # Find one value and use it as currently known min/Max if not self._is_sat(): @@ -446,7 +445,7 @@ def _optimize_generic(self, constraints: ConstraintSet, x: BitVec, goal: str, ma # This uses a binary search to find a suitable range for aux # Use known solution as min or max depending on the goal if goal == "maximize": - m, M = last_value, (1 << x.size) - 1 + m, M = last_value, (1 << X.size) - 1 else: m, M = 0, last_value @@ -466,8 +465,11 @@ def _optimize_generic(self, constraints: ConstraintSet, x: BitVec, goal: str, ma if time.time() - start > consts.timeout: raise SolverError("Timeout") - # reset to before the dichotomic search - self._reset(temp_cs.to_string(related_to=X)) + # reset to before the dichotomic search + with constraints as temp_cs: + X = temp_cs.new_bitvec(x.size) # _getvalue needs a Variable + temp_cs.add(X == x) + self._reset(temp_cs.to_string()) # At this point we know aux is inside [m,M] # Lets constrain it to that range @@ -527,7 +529,7 @@ def get_all_values( ) temp_cs.add(var == expression) - self._reset(temp_cs.to_string(related_to=var)) + self._reset(temp_cs.to_string()) result = [] start = time.time() while self._is_sat(): @@ -546,12 +548,14 @@ def get_all_values( if time.time() - start > consts.timeout: if silent: logger.info("Timeout searching for all solutions") - return result + return list(result) raise SolverError("Timeout") # Sometimes adding a new contraint after a check-sat eats all the mem - temp_cs.add(var != value) - self._reset(temp_cs.to_string(related_to=var)) - # self._assert(var != value) + if self._multiple_check: + self._smtlib.send(f"(assert {translate_to_smtlib(var != value)})") + else: + temp_cs.add(var != value) + self._reset(temp_cs.to_string()) return list(result) def _optimize_fancy(self, constraints: ConstraintSet, x: BitVec, goal: str, max_iter=10000): @@ -572,8 +576,7 @@ def _optimize_fancy(self, constraints: ConstraintSet, x: BitVec, goal: str, max_ X = temp_cs.new_bitvec(x.size) temp_cs.add(X == x) aux = temp_cs.new_bitvec(X.size, name="optimized_") - self._reset(temp_cs.to_string(related_to=X)) - self._smtlib.send(aux.declaration) + self._reset(temp_cs.to_string()) self._assert(operation(X, aux)) self._smtlib.send("(%s %s)" % (goal, aux.name)) @@ -607,7 +610,6 @@ def get_value(self, constraints: ConstraintSet, *expressions): subvar = temp_cs.new_bitvec(expression.value_bits) var.append(subvar) temp_cs.add(subvar == simplify(expression[i])) - self._reset(temp_cs.to_string()) if not self._is_sat(): raise SolverError( @@ -650,44 +652,47 @@ def __init__(self): This is implemented using an external z3 solver (via a subprocess). See https://github.com/Z3Prover/z3 """ - init = [ - # http://smtlib.cs.uiowa.edu/logics-all.shtml#QF_AUFBV - # Closed quantifier-free formulas over the theory of bitvectors and bitvector arrays extended with - # free sort and function symbols. - "(set-logic QF_AUFBV)", - # The declarations and definitions will be scoped - "(set-option :global-decls false)", - # sam.moelius: Option "tactic.solve_eqs.context_solve" was turned on by this commit in z3: - # https://github.com/Z3Prover/z3/commit/3e53b6f2dbbd09380cd11706cabbc7e14b0cc6a2 - # Turning it off greatly improves Manticore's performance on test_integer_overflow_storageinvariant - # in test_consensys_benchmark.py. - "(set-option :tactic.solve_eqs.context_solve false)", - ] command = f"{consts.z3_bin} -t:{consts.timeout * 1000} -memory:{consts.memory} -smt2 -in" - support_minmax, support_reset = self.__autoconfig() + init, support_minmax, support_reset, multiple_check = self.__autoconfig() super().__init__( command=command, init=init, value_fmt=16, - support_minmax=True, - support_reset=True, + support_minmax=support_minmax, + support_reset=support_reset, + multiple_check=multiple_check, support_pushpop=True, debug=False, ) def __autoconfig(self): + init = [ + # http://smtlib.cs.uiowa.edu/logics-all.shtml#QF_AUFBV + # Closed quantifier-free formulas over the theory of bitvectors and bitvector arrays extended with + # free sort and function symbols. + "(set-logic QF_AUFBV)", + # The declarations and definitions will be scoped + "(set-option :global-decls false)", + ] + # To cache what get-info returned; can be directly set when writing tests self.version = self._solver_version() - if self.version >= Version(4, 5, 0): - support_minmax = False - support_reset = False - elif self.version >= Version(4, 4, 1): - support_minmax = True - support_reset = False - else: - logger.debug(" Please install Z3 4.4.1 or newer to get optimization support") - return support_minmax, support_reset + support_reset = True + + if self.version > Version(4, 8, 4): + # sam.moelius: Option "tactic.solve_eqs.context_solve" was turned on by this commit in z3: + # https://github.com/Z3Prover/z3/commit/3e53b6f2dbbd09380cd11706cabbc7e14b0cc6a2 + # Turning it off greatly improves Manticore's performance on test_integer_overflow_storageinvariant + # in test_consensys_benchmark.py. + init.append("(set-option :tactic.solve_eqs.context_solve false)") + + support_minmax = self.version >= Version(4, 4, 1) + + # Certain version of Z3 fails to handle multiple check-sat + # https://gist.github.com/feliam/0f125c00cb99ef05a6939a08c4578902 + multiple_check = self.version < Version(4, 8, 7) + return init, support_minmax, support_reset, multiple_check def _solver_version(self) -> Version: """ diff --git a/manticore/core/smtlib/visitors.py b/manticore/core/smtlib/visitors.py index cbb256e11..eff0d9b58 100644 --- a/manticore/core/smtlib/visitors.py +++ b/manticore/core/smtlib/visitors.py @@ -5,9 +5,10 @@ import copy import logging import operator +import math +from decimal import Decimal logger = logging.getLogger(__name__) -UNSIGN_MASK = (1 << 256) - 1 class Visitor: @@ -51,7 +52,7 @@ def result(self): return self._stack[-1] def _method(self, expression, *args): - for cls in expression.__class__.__mro__: + for cls in expression.__class__.__mro__[:-1]: sort = cls.__name__ methodname = "visit_%s" % sort if hasattr(self, methodname): @@ -72,7 +73,8 @@ def visit(self, node, use_fixed_point=False): :param use_fixed_point: if True, it runs _methods until a fixed point is found :type use_fixed_point: Bool """ - + if isinstance(node, ArrayProxy): + node = node.array cache = self._cache visited = set() stack = [] @@ -285,7 +287,6 @@ def __init__(self, **kw): BitVecAdd: operator.__add__, BitVecSub: operator.__sub__, BitVecMul: operator.__mul__, - BitVecDiv: operator.__floordiv__, BitVecShiftLeft: operator.__lshift__, BitVecShiftRight: operator.__rshift__, BitVecAnd: operator.__and__, @@ -293,21 +294,72 @@ def __init__(self, **kw): BitVecXor: operator.__xor__, BitVecNot: operator.__not__, BitVecNeg: operator.__invert__, - LessThan: operator.__lt__, - LessOrEqual: operator.__le__, - BoolEqual: operator.__eq__, - GreaterThan: operator.__gt__, - GreaterOrEqual: operator.__ge__, BoolAnd: operator.__and__, + BoolEqual: operator.__eq__, BoolOr: operator.__or__, BoolNot: operator.__not__, - BitVecUnsignedDiv: lambda x, y: (x & UNSIGN_MASK) // (y & UNSIGN_MASK), - UnsignedLessThan: lambda x, y: (x & UNSIGN_MASK) < (y & UNSIGN_MASK), - UnsignedLessOrEqual: lambda x, y: (x & UNSIGN_MASK) <= (y & UNSIGN_MASK), - UnsignedGreaterThan: lambda x, y: (x & UNSIGN_MASK) > (y & UNSIGN_MASK), - UnsignedGreaterOrEqual: lambda x, y: (x & UNSIGN_MASK) >= (y & UNSIGN_MASK), + UnsignedLessThan: operator.__lt__, + UnsignedLessOrEqual: operator.__le__, + UnsignedGreaterThan: operator.__gt__, + UnsignedGreaterOrEqual: operator.__ge__, } + def visit_BitVecUnsignedDiv(self, expression, *operands) -> Optional[BitVecConstant]: + if all(isinstance(o, Constant) for o in operands): + a = operands[0].value + b = operands[1].value + if a == 0: + ret = 0 + else: + ret = math.trunc(Decimal(a) / Decimal(b)) + return BitVecConstant(expression.size, ret, taint=expression.taint) + return None + + def visit_LessThan(self, expression, *operands) -> Optional[BoolConstant]: + if all(isinstance(o, Constant) for o in operands): + a = operands[0].signed_value + b = operands[1].signed_value + return BoolConstant(a < b, taint=expression.taint) + return None + + def visit_LessOrEqual(self, expression, *operands) -> Optional[BoolConstant]: + if all(isinstance(o, Constant) for o in operands): + a = operands[0].signed_value + b = operands[1].signed_value + return BoolConstant(a <= b, taint=expression.taint) + return None + + def visit_GreaterThan(self, expression, *operands) -> Optional[BoolConstant]: + if all(isinstance(o, Constant) for o in operands): + a = operands[0].signed_value + b = operands[1].signed_value + return BoolConstant(a > b, taint=expression.taint) + return None + + def visit_GreaterOrEqual(self, expression, *operands) -> Optional[BoolConstant]: + if all(isinstance(o, Constant) for o in operands): + a = operands[0].signed_value + b = operands[1].signed_value + return BoolConstant(a >= b, taint=expression.taint) + return None + + def visit_BitVecDiv(self, expression, *operands) -> Optional[BitVecConstant]: + if all(isinstance(o, Constant) for o in operands): + signmask = operands[0].signmask + mask = operands[0].mask + numeral = operands[0].value + dividend = operands[1].value + if numeral & signmask: + numeral = -(mask - numeral - 1) + if dividend & signmask: + dividend = -(mask - dividend - 1) + if dividend == 0: + result = 0 + else: + result = math.trunc(Decimal(numeral) / Decimal(dividend)) + return BitVecConstant(expression.size, result, taint=expression.taint) + return None + def visit_BitVecConcat(self, expression, *operands): if all(isinstance(o, Constant) for o in operands): result = 0 @@ -989,6 +1041,9 @@ def simplify_array_select(array_exp): def get_variables(expression): + if isinstance(expression, ArrayProxy): + expression = expression.array + visitor = GetDeclarations() visitor.visit(expression) return visitor.result diff --git a/manticore/native/state_merging.py b/manticore/native/state_merging.py index 2ff9f2e64..332f95b73 100644 --- a/manticore/native/state_merging.py +++ b/manticore/native/state_merging.py @@ -32,7 +32,8 @@ def compare_buffers(cs, buffer1, buffer2): if len(buffer1) != len(buffer2): return False for b1, b2 in zip(buffer1, buffer2): - if not SelectedSolver.instance().must_be_true(cs, b1 == b2): + cond = cs.migrate(b1 == b2) + if not SelectedSolver.instance().must_be_true(cs, cond): return False return True @@ -69,7 +70,7 @@ def compare_byte_vals(mem1, mem2, addr, merged_constraint): val2 = mem2.read(addr, 1) # since we only read a single byte value, these lists should only have one entry in them assert len(val1) == 1 and len(val2) == 1 - cond_to_check = val1[0] == val2[0] + cond_to_check = merged_constraint.migrate(val1[0] == val2[0]) if not SelectedSolver.instance().must_be_true(merged_constraint, cond_to_check): return False else: @@ -190,13 +191,20 @@ def merge_cpu(cpu1, cpu2, state, exp1, merged_constraint): if isinstance(val1, BitVec) and isinstance(val2, BitVec): assert val1.size == val2.size if issymbolic(val1) or issymbolic(val2) or val1 != val2: - if SelectedSolver.instance().must_be_true(merged_constraint, val1 != val2): + val1_migrated = merged_constraint.migrate(val1) + val2_migrated = merged_constraint.migrate(val2) + if SelectedSolver.instance().must_be_true( + merged_constraint, val1_migrated != val2_migrated + ): merged_regs.append(reg) if cpu1.regfile.sizeof(reg) == 1: - state.cpu.write_register(reg, Operators.ITE(exp1, val1, val2)) + state.cpu.write_register(reg, Operators.ITE(exp1, val1_migrated, val2_migrated)) else: state.cpu.write_register( - reg, Operators.ITEBV(cpu1.regfile.sizeof(reg), exp1, val1, val2) + reg, + Operators.ITEBV( + cpu1.regfile.sizeof(reg), exp1, val1_migrated, val2_migrated + ), ) return merged_regs diff --git a/manticore/platforms/evm.py b/manticore/platforms/evm.py index 0b85987f4..435c6d83e 100644 --- a/manticore/platforms/evm.py +++ b/manticore/platforms/evm.py @@ -1381,6 +1381,8 @@ def setstate(state, value): def read_buffer(self, offset, size): if issymbolic(size) and not isinstance(size, Constant): raise EVMException("Symbolic size not supported") + if isinstance(size, Constant): + size = size.value if size == 0: return b"" self._allocate(offset, size) diff --git a/manticore/utils/event.py b/manticore/utils/event.py index 82268c33c..2e7e03517 100644 --- a/manticore/utils/event.py +++ b/manticore/utils/event.py @@ -75,15 +75,15 @@ def all_events(cls): return all_evts @staticmethod - def will_did(name): + def will_did(name, can_raise=False): """Pre/pos emiting signal""" def deco(func): @functools.wraps(func) def newFunction(self, *args, **kw): - self._publish(f"will_{name}", *args, **kw) + self._publish(f"will_{name}", *args, can_raise=can_raise, **kw) result = func(self, *args, **kw) - self._publish(f"did_{name}", result) + self._publish(f"did_{name}", result, can_raise=can_raise) return result return newFunction @@ -139,11 +139,17 @@ def _check_event(self, _name): # Wrapper for _publish_impl that also makes sure the event is published from # a class that supports it. # The underscore _name is to avoid naming collisions with callback params - def _publish(self, _name, *args, **kwargs): + def _publish(self, _name, *args, can_raise=True, **kwargs): # only publish if there is at least one subscriber - if _name in self.__sub_events__: - self._check_event(_name) - self._publish_impl(_name, *args, **kwargs) + try: + if _name in self.__sub_events__: + self._check_event(_name) + self._publish_impl(_name, *args, **kwargs) + except Exception as e: + if can_raise: + raise + else: + logger.info("Exception raised at a callback %r", e) # Separate from _publish since the recursive method call to forward an event # shouldn't check the event. diff --git a/manticore/wasm/executor.py b/manticore/wasm/executor.py index 962863ed2..dd6eeede7 100644 --- a/manticore/wasm/executor.py +++ b/manticore/wasm/executor.py @@ -28,6 +28,9 @@ import operator import math +MASK_64 = (1 << 64) - 1 +MASK_32 = (1 << 32) - 1 + class Executor(Eventful): """ @@ -450,14 +453,13 @@ def int_store(self, store, stack, imm: MemoryImm, ty: type, n=None): ) # TODO - Implement a symbolic memory model ea = i + imm.offset N = n if n else (32 if ty is I32 else 64) + mask = (1 << N) - 1 if ea not in mem: raise OutOfBoundsMemoryTrap(ea) if (ea + (N // 8)) - 1 not in mem: raise OutOfBoundsMemoryTrap(ea + (N // 8)) if n: - b = [ - Operators.CHR(Operators.EXTRACT(c % 2 ** N, offset, 8)) for offset in range(0, N, 8) - ] + b = [Operators.CHR(Operators.EXTRACT(c & mask, offset, 8)) for offset in range(0, N, 8)] else: b = [Operators.CHR(Operators.EXTRACT(c, offset, 8)) for offset in range(0, N, 8)] @@ -742,6 +744,17 @@ def i32_clz(self, store, stack): res = Operators.ITEBV(32, flag, res, 32) stack.push(I32.cast(res)) + # value = src.read() + # flag = Operators.EXTRACT(value, 0, 1) == 1 + # res = 0 + # for pos in range(1, src.size): + # res = Operators.ITEBV(dest.size, flag, res, pos) + # flag = Operators.OR(flag, Operators.EXTRACT(value, pos, 1) == 1) + # + # cpu.CF = res == src.size + # cpu.ZF = res == 0 + # dest.write(res) + def i32_ctz(self, store, stack): # Copied from x86 TZCNT stack.has_type_on_top(I32, 1) c1 = stack.pop() @@ -768,19 +781,19 @@ def i32_add(self, store, stack): stack.has_type_on_top(I32, 2) c2 = stack.pop() c1 = stack.pop() - stack.push(I32.cast((c2 + c1) % 2 ** 32)) + stack.push(I32.cast((c2 + c1) & MASK_32)) def i32_sub(self, store, stack): stack.has_type_on_top(I32, 2) c2 = stack.pop() c1 = stack.pop() - stack.push(I32.cast((c1 - c2 + 2 ** 32) % 2 ** 32)) + stack.push(I32.cast((c1 - c2 + 2 ** 32) & MASK_32)) def i32_mul(self, store, stack): stack.has_type_on_top(I32, 2) c2 = stack.pop() c1 = stack.pop() - stack.push(I32.cast((c2 * c1) % 2 ** 32)) + stack.push(I32.cast((c2 * c1) & MASK_32)) def i32_div_s(self, store, stack): stack.has_type_on_top(I32, 2) @@ -850,7 +863,7 @@ def i32_shl(self, store, stack): stack.has_type_on_top(I32, 2) c2 = stack.pop() c1 = stack.pop() - stack.push(I32.cast((c1 << (c2 % 32)) % 2 ** 32)) + stack.push(I32.cast((c1 << (c2 % 32)) & MASK_32)) def i32_shr_s(self, store, stack): stack.has_type_on_top(I32, 2) @@ -925,19 +938,19 @@ def i64_add(self, store, stack): stack.has_type_on_top(I64, 2) c2 = stack.pop() c1 = stack.pop() - stack.push(I64.cast((c2 + c1) % 2 ** 64)) + stack.push(I64.cast((c2 + c1) & MASK_64)) def i64_sub(self, store, stack): stack.has_type_on_top(I64, 2) c2 = stack.pop() c1 = stack.pop() - stack.push(I64.cast((c1 - c2 + 2 ** 64) % 2 ** 64)) + stack.push(I64.cast((c1 - c2 + 2 ** 64) & MASK_64)) def i64_mul(self, store, stack): stack.has_type_on_top(I64, 2) c2 = stack.pop() c1 = stack.pop() - stack.push(I64.cast((c2 * c1) % 2 ** 64)) + stack.push(I64.cast((c2 * c1) & MASK_64)) def i64_div_s(self, store, stack): stack.has_type_on_top(I64, 2) @@ -1014,7 +1027,7 @@ def i64_shl(self, store, stack): stack.has_type_on_top(I64, 2) c2 = stack.pop() c1 = stack.pop() - stack.push(I64.cast((c1 << (c2 % 64)) % 2 ** 64)) + stack.push(I64.cast((c1 << (c2 % 64)) & MASK_64)) def i64_shr_s(self, store, stack): stack.has_type_on_top(I64, 2) @@ -1054,7 +1067,7 @@ def i64_rotr(self, store, stack): def i32_wrap_i64(self, store, stack): stack.has_type_on_top(I64, 1) c1: I64 = stack.pop() - c1 %= 2 ** 32 + c1 &= MASK_32 c1 = Operators.EXTRACT(c1, 0, 32) stack.push(I32.cast(c1)) diff --git a/tests/native/test_armv7cpu.py b/tests/native/test_armv7cpu.py index 144c96d95..1b43bfa7e 100644 --- a/tests/native/test_armv7cpu.py +++ b/tests/native/test_armv7cpu.py @@ -1568,7 +1568,7 @@ def test_uqsub8_concrete_saturated(self): @itest_custom("uqsub8 r3, r1, r2") @itest_setregs("R2=0x01010101") def test_uqsub8_sym(self): - op1 = BitVecVariable(32, "op1") + op1 = self.cpu.memory.constraints.new_bitvec(32, "op1") self.cpu.memory.constraints.add(op1 >= 0x04030201) self.cpu.memory.constraints.add(op1 < 0x04030204) self.cpu.R1 = op1 @@ -2416,7 +2416,7 @@ def test_sxth(self): @itest_custom("blx r1") def test_blx_reg_sym(self): - dest = BitVecVariable(32, "dest") + dest = self.cpu.memory.constraints.new_bitvec(32, "dest") self.cpu.memory.constraints.add(dest >= 0x1000) self.cpu.memory.constraints.add(dest <= 0x1001) self.cpu.R1 = dest @@ -2462,7 +2462,7 @@ def test_symbolic_conditional(self): self._setupCpu(asm, mode=CS_MODE_THUMB) # code starts at 0x1004 # Set R0 as a symbolic value - self.cpu.R0 = BitVecVariable(32, "val") + self.cpu.R0 = self.cpu.memory.constraints.new_bitvec(32, "val") self.cpu.execute() # tst r0, r0 self.cpu.execute() # beq label diff --git a/tests/native/test_linux.py b/tests/native/test_linux.py index a12580192..9f9bd3df9 100644 --- a/tests/native/test_linux.py +++ b/tests/native/test_linux.py @@ -285,7 +285,7 @@ def test_armv7_syscall_openat_concrete(self) -> None: def test_armv7_syscall_openat_symbolic(self) -> None: platform, temp_dir = self._armv7_create_openat_state() try: - platform.current.R0 = BitVecVariable(32, "fd") + platform.current.R0 = platform.constraints.new_bitvec(32, "fd") with self.assertRaises(ConcretizeRegister) as cm: platform.syscall() diff --git a/tests/other/data/ErrRelated.pkl.gz b/tests/other/data/ErrRelated.pkl.gz new file mode 100644 index 000000000..eb1036567 Binary files /dev/null and b/tests/other/data/ErrRelated.pkl.gz differ diff --git a/tests/other/test_smtlibv2.py b/tests/other/test_smtlibv2.py index 501d03ee8..c377ddd9e 100644 --- a/tests/other/test_smtlibv2.py +++ b/tests/other/test_smtlibv2.py @@ -1,5 +1,5 @@ import unittest -from unittest.mock import patch, mock_open +import os from manticore.core.smtlib import ( ConstraintSet, @@ -12,15 +12,56 @@ arithmetic_simplify, constant_folder, replace, + BitVecConstant, ) from manticore.core.smtlib.solver import Z3Solver, YicesSolver, CVC4Solver from manticore.core.smtlib.expression import * from manticore.utils.helpers import pickle_dumps +from manticore import config # logging.basicConfig(filename = "test.log", # format = "%(asctime)s: %(name)s:%(levelname)s: %(message)s", # level = logging.DEBUG) +DIRPATH = os.path.dirname(__file__) + + +class RegressionTest(unittest.TestCase): + def test_related_to(self): + import gzip + import pickle, sys + + filename = os.path.abspath(os.path.join(DIRPATH, "data", "ErrRelated.pkl.gz")) + + # A constraint set and a contraint caught in the act of making related_to fail + constraints, constraint = pickle.loads(gzip.open(filename, "rb").read()) + + Z3Solver.instance().can_be_true.cache_clear() + ground_truth = Z3Solver.instance().can_be_true(constraints, constraint) + self.assertEqual(ground_truth, False) + + Z3Solver.instance().can_be_true.cache_clear() + self.assertEqual( + ground_truth, + Z3Solver.instance().can_be_true(constraints.related_to(constraints), constraint), + ) + + # Replace + new_constraint = Operators.UGE( + Operators.SEXTEND(BitVecConstant(256, 0x1A), 256, 512) * BitVecConstant(512, 1), + 0x00000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000, + ) + self.assertEqual(translate_to_smtlib(constraint), translate_to_smtlib(new_constraint)) + + Z3Solver.instance().can_be_true.cache_clear() + self.assertEqual(ground_truth, Z3Solver.instance().can_be_true(constraints, new_constraint)) + + Z3Solver.instance().can_be_true.cache_clear() + self.assertEqual( + ground_truth, + Z3Solver.instance().can_be_true(constraints.related_to(new_constraint), new_constraint), + ) + """ class Z3Specific(unittest.TestCase): @@ -943,9 +984,8 @@ def test_SDIV(self): cs.add(b == 0x86) # -122 cs.add(c == 0x11) # 17 cs.add(a == Operators.SDIV(b, c)) - cs.add(d == b / c) + cs.add(d == (b // c)) cs.add(a == d) - self.assertTrue(solver.check(cs)) self.assertEqual(solver.get_value(cs, a), -7 & 0xFF) @@ -1004,6 +1044,33 @@ def test_NOT(self): self.assertTrue(solver.must_be_true(cs, Operators.NOT(False))) self.assertTrue(solver.must_be_true(cs, Operators.NOT(a == b))) + def testRelated(self): + cs = ConstraintSet() + aa1 = cs.new_bool(name="AA1") + aa2 = cs.new_bool(name="AA2") + bb1 = cs.new_bool(name="BB1") + bb2 = cs.new_bool(name="BB2") + cs.add(Operators.OR(aa1, aa2)) + cs.add(Operators.OR(bb1, bb2)) + self.assertTrue(self.solver.check(cs)) + # No BB variables related to AA + self.assertNotIn("BB", cs.related_to(aa1).to_string()) + self.assertNotIn("BB", cs.related_to(aa2).to_string()) + self.assertNotIn("BB", cs.related_to(aa1 == aa2).to_string()) + self.assertNotIn("BB", cs.related_to(aa1 == False).to_string()) + # No AA variables related to BB + self.assertNotIn("AA", cs.related_to(bb1).to_string()) + self.assertNotIn("AA", cs.related_to(bb2).to_string()) + self.assertNotIn("AA", cs.related_to(bb1 == bb2).to_string()) + self.assertNotIn("AA", cs.related_to(bb1 == False).to_string()) + + # Nothing is related to tautologies? + self.assertEqual("", cs.related_to(simplify(bb1 == bb1)).to_string()) + + # But if the tautollogy can not get simplified we have to ask the solver + # and send in all the other stuff + self.assertNotIn("AA", cs.related_to(bb1 == bb1).to_string()) + def test_API(self): """ As we've split up the Constant, Variable, and Operation classes to avoid using multiple inheritance, @@ -1026,6 +1093,39 @@ def test_API(self): for attr in attrs: self.assertTrue(hasattr(cls, attr), f"{cls.__name__} is missing attribute {attr}") + def test_signed_unsigned_LT_simple(self): + cs = ConstraintSet() + a = cs.new_bitvec(32) + b = cs.new_bitvec(32) + + cs.add(a == 0x1) + cs.add(b == 0x80000000) + + lt = b < a + ult = b.ult(a) + + self.assertFalse(self.solver.can_be_true(cs, ult)) + self.assertTrue(self.solver.must_be_true(cs, lt)) + + def test_signed_unsigned_LT_complex(self): + mask = (1 << 32) - 1 + + cs = ConstraintSet() + _a = cs.new_bitvec(32) + _b = cs.new_bitvec(32) + + cs.add(_a == 0x1) + cs.add(_b == (0x80000000 - 1)) + + a = _a & mask + b = (_b + 1) & mask + + lt = b < a + ult = b.ult(a) + + self.assertFalse(self.solver.can_be_true(cs, ult)) + self.assertTrue(self.solver.must_be_true(cs, lt)) + class ExpressionTestYices(ExpressionTest): def setUp(self):