Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pyex: optimizations #104

Merged
merged 22 commits into from
Dec 20, 2023
Merged
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b1afdae
not-prefix: fix; preprocess tuning
vhavlena Oct 28, 2023
c034fe9
theory rewrite in mk_literal; contains special case
vhavlena Nov 2, 2023
41bfef7
loop-protection: rebase fix
vhavlena Nov 3, 2023
a9fb53e
replace: contains specialization
vhavlena Nov 8, 2023
4401366
replace: specialization fix
vhavlena Nov 8, 2023
799d488
not-contains: enlarge the fragment
vhavlena Nov 8, 2023
8eeba68
replace: axiom fix; not-contains: block fixing
vhavlena Nov 24, 2023
410bc6f
mk_literal: string theory propagation
vhavlena Nov 26, 2023
05d96b8
loop-protection: reverting changes
vhavlena Nov 26, 2023
7deac85
preprocess: infer_alignment and common_prefix propagation
vhavlena Nov 29, 2023
3782ba8
not-contains: heuristics enlarging the fragment
vhavlena Dec 1, 2023
c888b83
string_theory_propagation: reverting changes
vhavlena Dec 1, 2023
b993304
seq_rewriter: remove ite; predicate specification
vhavlena Dec 2, 2023
146bcb8
preprocess: generate equivalent, not prefix, not suffix specialization
vhavlena Dec 4, 2023
e060cbf
seq_rewriter: remove seq.unit; generate_equiv: bug fixing; replace: l…
vhavlena Dec 6, 2023
1482571
generate_equiv: minor fix
vhavlena Dec 6, 2023
4df8a37
comments; minor fixes
vhavlena Dec 8, 2023
1684104
resolving Juraj's comments
vhavlena Dec 20, 2023
9b14777
fix typo
jurajsic Dec 20, 2023
960ba0c
comments; renaming
vhavlena Dec 20, 2023
c9ccbcb
can_unify_not_contains: semantics
vhavlena Dec 20, 2023
ae50e8b
skip_len_sat: disabled for conversions
vhavlena Dec 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions src/ast/rewriter/seq_rewriter.cpp
Original file line number Diff line number Diff line change
@@ -1451,6 +1451,9 @@ br_status seq_rewriter::mk_seq_contains(expr* a, expr* b, expr_ref& result) {
}
}

// This is for Z3-Noodler as it may itroduce seq.unit functions, which are not supported
return BR_FAILED;

unsigned offs = 0;
unsigned sz = as.size();
expr* b0 = bs.get(0);
@@ -1843,13 +1846,14 @@ br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result
default:
break;
}
if (is_zero && !as.empty() && str().is_unit(as.get(0))) {
expr_ref a1(str().mk_concat(as.size() - 1, as.data() + 1, as[0]->get_sort()), m());
expr_ref b1(str().mk_index(a1, b, c), m());
result = m().mk_ite(str().mk_prefix(b, a), zero(),
m().mk_ite(m_autil.mk_ge(b1, zero()), m_autil.mk_add(one(), b1), minus_one()));
return BR_REWRITE3;
}
// FIXME: Not suitable for Z3-Noodler as it itroduces ite constructs inside predicates.
// if (is_zero && !as.empty() && str().is_unit(as.get(0))) {
// expr_ref a1(str().mk_concat(as.size() - 1, as.data() + 1, as[0]->get_sort()), m());
// expr_ref b1(str().mk_index(a1, b, c), m());
// result = m().mk_ite(str().mk_prefix(b, a), zero(),
// m().mk_ite(m_autil.mk_ge(b1, zero()), m_autil.mk_add(one(), b1), minus_one()));
// return BR_REWRITE3;
// }
expr_ref ra(a, m());
if (str().is_unit(b) && m().is_value(b) &&
reduce_by_char(ra, b, 4)) {
1 change: 1 addition & 0 deletions src/smt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -76,6 +76,7 @@ z3_add_component(smt
theory_str_noodler/nielsen_decision_procedure.cpp
theory_str_noodler/formula.cpp
theory_str_noodler/util.cc
theory_str_noodler/expr_cases.cpp
theory_str_noodler/regex.cpp
theory_str_noodler/aut_assignment.cpp
theory_str_mc.cpp
18 changes: 17 additions & 1 deletion src/smt/theory_str_noodler/aut_assignment.h
Original file line number Diff line number Diff line change
@@ -141,7 +141,7 @@ namespace smt::noodler {
len = cmp.num_of_states() - 1;
else
len = -1;
return cmp.num_of_states() == cmp.delta.num_of_transitions() + 1;
return (cmp.num_of_states() == cmp.delta.num_of_transitions() + 1) && (cmp.final.size() == 1);
}

/**
@@ -159,6 +159,19 @@ namespace smt::noodler {
return aut.num_of_states() == aut.delta.num_of_transitions() + 1 && aut.initial.size() == 1 && aut.final.size() == 1;
}

/**
* @brief Check if the given terms have disjoint languages.
*
* @param t1 First term
* @param t2 Second term
* @return true <-> L(t1) and L(t2) are disjoint.
*/
bool are_disjoint(const BasicTerm &t1, const BasicTerm& t2) const {
mata::nfa::Nfa aut_t1 = *this->at(t1);
mata::nfa::Nfa aut_t2 = *this->at(t2);
return mata::nfa::intersection(aut_t1, aut_t2).is_lang_empty();
}

/**
* @brief Check if the language of the basic term has a fixed length
*
@@ -184,6 +197,9 @@ namespace smt::noodler {
*/
bool is_sat() const {
for (const auto& pr : *this) {
if(pr.second->final.size() == 0) {
return false;
}
if(pr.second->is_lang_empty())
return false;
}
61 changes: 58 additions & 3 deletions src/smt/theory_str_noodler/decision_procedure.cpp
Original file line number Diff line number Diff line change
@@ -780,14 +780,31 @@ namespace smt::noodler {
FormulaPreprocessor prep_handler{std::move(this->formula), std::move(this->init_aut_ass), std::move(this->init_length_sensitive_vars), m_params};

// So-far just lightweight preprocessing
prep_handler.remove_trivial();
prep_handler.reduce_diseqalities();
if (opt == PreprocessType::UNDERAPPROX) {
prep_handler.underapprox_languages();
}
prep_handler.propagate_eps();
// Refinement of languages is beneficial only for instances containing not(contains) or disequalities (it is used to reduce the number of
// disequations/not(contains). For a strong reduction you need to have languages as precise as possible). In the case of
// pure equalitities it could create bigger automata, which may be problem later during the noodlification.
if(this->formula.contains_pred_type(PredicateType::Inequation) || this->not_contains.get_predicates().size() > 0) {
// Refine languages is applied in the order given by the predicates. Single iteration
// might not update crucial variables that could contradict the formula.
// Two iterations seem to be a good trade-off since the automata could explode in the fixpoint.
prep_handler.refine_languages();
prep_handler.refine_languages();
}
prep_handler.propagate_variables();
prep_handler.propagate_eps();
prep_handler.infer_alignment();
prep_handler.remove_regular();
prep_handler.skip_len_sat();
// Skip_len_sat is not compatible with not(contains) and conversions as the preprocessing may skip equations with variables
// inside not(contains)/conversion. (Note that if opt == PreprocessType::UNDERAPPROX, there is no not(contains)).
if(this->not_contains.get_predicates().empty() && this->conversions.empty()) {
prep_handler.skip_len_sat();
}
prep_handler.generate_identities();
prep_handler.propagate_variables();
prep_handler.refine_languages();
@@ -808,6 +825,7 @@ namespace smt::noodler {
}
);
prep_handler.generate_equiv(len_eq_vars);
prep_handler.common_prefix_propagation();
prep_handler.propagate_variables();
prep_handler.generate_identities();
prep_handler.remove_regular();
@@ -820,15 +838,24 @@ namespace smt::noodler {
prep_handler.remove_regular();
prep_handler.skip_len_sat();
}
prep_handler.reduce_regular_sequence(1);
prep_handler.remove_regular();

// Refresh the instance
this->formula = prep_handler.get_modified_formula();
this->init_aut_ass = prep_handler.get_aut_assignment();
this->init_length_sensitive_vars = prep_handler.get_len_variables();
this->preprocessing_len_formula = prep_handler.get_len_formula();

if (!this->init_aut_ass.is_sat()) {
// some automaton in the assignment is empty => we won't find solution
return l_false;
}

// try to replace the not contains predicates (so-far we replace it by regular constraints)
replace_not_contains();
if(replace_not_contains() == l_false || can_unify_not_contains(prep_handler) == l_true) {
return l_false;
}

// there remains some not contains --> return undef
if(this->not_contains.get_predicates().size() > 0) {
@@ -967,11 +994,18 @@ namespace smt::noodler {
* lit is a literal by a regular constraint x notin Alit' where Alit' was obtained from A(lit) by setting all
* states initial and final.
*/
void DecisionProcedure::replace_not_contains() {
lbool DecisionProcedure::replace_not_contains() {
Formula remain_not_contains{};
for(const Predicate& pred : this->not_contains.get_predicates()) {
Concat left = pred.get_params()[0];
Concat right = pred.get_params()[1];
if(left.size() == 1 && right.size() == 1) {
if(this->init_aut_ass.is_singleton(left[0]) && this->init_aut_ass.is_singleton(right[0])) {
if(mata::nfa::are_equivalent(*this->init_aut_ass.at(left[0]), *this->init_aut_ass.at(right[0]))) {
return l_false;
}
}
}
if(left.size() == 1 && right.size() == 1) {
if(this->init_aut_ass.is_singleton(left[0]) && right[0].is_variable()) {
mata::nfa::Nfa nfa_copy = *this->init_aut_ass.at(left[0]);
@@ -990,9 +1024,30 @@ namespace smt::noodler {
continue;
}
}
if(right.size() == 1 && this->init_aut_ass.is_epsilon(right[0])) {
return l_false;
}
remain_not_contains.add_predicate(pred);
}
this->not_contains = remain_not_contains;
return l_undef;
}

/**
* @brief Check if it is possible to syntactically unify not contains terms. If they are included (in the sense of vectors) the
* not(contain) is unsatisfiable.
*
* @param prep FormulaPreprocessor
* @return l_true -> can be unified
*/
lbool DecisionProcedure::can_unify_not_contains(const FormulaPreprocessor& prep) {
for(const Predicate& pred : this->not_contains.get_predicates()) {
if(prep.can_unify_contain(pred.get_params()[0], pred.get_params()[1])) {
return l_true;
}

}
return l_undef;
}

} // Namespace smt::noodler.
19 changes: 16 additions & 3 deletions src/smt/theory_str_noodler/decision_procedure.h
Original file line number Diff line number Diff line change
@@ -30,6 +30,9 @@ namespace smt::noodler {
FROM_INT,
};

// Term conversion: to_int/from_int ...
using TermConversion = std::tuple<BasicTerm,BasicTerm,ConversionType>;

inline std::string get_conversion_name(ConversionType type) {
switch (type)
{
@@ -307,7 +310,7 @@ namespace smt::noodler {
Formula formula;
AutAssignment init_aut_ass;
// contains to/from_code/int conversions
std::vector<std::tuple<BasicTerm,BasicTerm,ConversionType>> conversions;
std::vector<TermConversion> conversions;

// the length formula from preprocessing, get_lengths should create conjunct with it
LenNode preprocessing_len_formula = LenNode(LenFormulaType::TRUE,{});
@@ -338,8 +341,18 @@ namespace smt::noodler {

/**
* @brief Construct constraints to get rid of not_contains predicates.
* @return l_false -> unsatisfiable constaint; l_undef if it is not evident
*/
lbool replace_not_contains();

/**
* @brief Check if it is possible to syntactically unify not contains terms. If they are included (in the sense of vectors) the
* not(contain) is unsatisfiable.
*
* @param prep FormulaPreprocessor
* @return l_true -> can be unified
*/
void replace_not_contains();
lbool can_unify_not_contains(const FormulaPreprocessor& prep);

public:

@@ -360,7 +373,7 @@ namespace smt::noodler {
Formula formula, AutAssignment init_aut_ass,
std::unordered_set<BasicTerm> init_length_sensitive_vars,
const theory_str_noodler_params &par,
std::vector<std::tuple<BasicTerm,BasicTerm,ConversionType>> conversions
std::vector<TermConversion> conversions
) : init_length_sensitive_vars(init_length_sensitive_vars),
formula(formula),
init_aut_ass(init_aut_ass),
36 changes: 36 additions & 0 deletions src/smt/theory_str_noodler/expr_cases.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <cassert>

#include "util/z3_exception.h"

#include "util.h"
#include "theory_str_noodler.h"
#include "inclusion_graph.h"
#include "aut_assignment.h"

namespace {
using mata::nfa::Nfa;
}

namespace smt::noodler::expr_cases {

bool is_contains_index(expr* e, expr*& ind, ast_manager& m, seq_util& m_util_s, arith_util& m_util_a) {
// e.g. (str.contains (str.substr value2 0 (+ n (str.indexof value2 "A" 0))) "A")
expr *subs = nullptr, *val = nullptr, *val_ind = nullptr, *str = nullptr, *str_ind = nullptr, *offset_ind = nullptr;
if(m_util_s.str.is_contains(e, subs, val)) { // subs = (str.substr value2 0 (+ n (str.indexof value2 "A" 0)))
expr *subb1 = nullptr, *subb2 = nullptr, *num = nullptr;
rational num_val; //n
if(m_util_s.str.is_extract(subs, str, subb1, subb2)) {
if(m_util_a.is_zero(subb1) && m_util_a.is_add(subb2, num, ind) && m_util_a.is_numeral(num, num_val) && num_val.get_int32() > 0) {
if(m_util_s.str.is_index(ind, str_ind, val_ind) || (m_util_s.str.is_index(ind, str_ind, val_ind, offset_ind) && m_util_a.is_zero(offset_ind))) {
if(str->hash() != str_ind->hash() || val->hash() != val_ind->hash()) {
return false;
}
return true;
}
}
}
}
return false;
}

}
46 changes: 46 additions & 0 deletions src/smt/theory_str_noodler/expr_cases.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef _NOODLER_EXPR_CASES_H_
#define _NOODLER_EXPR_CASES_H_

#include <functional>
#include <list>
#include <set>
#include <stack>
#include <map>
#include <memory>
#include <queue>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "smt/params/smt_params.h"
#include "ast/arith_decl_plugin.h"
#include "ast/seq_decl_plugin.h"
#include "smt/params/theory_str_params.h"
#include "smt/smt_kernel.h"
#include "smt/smt_theory.h"
#include "smt/smt_arith_value.h"
#include "util/scoped_vector.h"
#include "util/union_find.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/rewriter/th_rewriter.h"

#include "formula.h"
#include "aut_assignment.h"

namespace smt::noodler::expr_cases {

/**
* @brief Check if the given contraint @p e is of the form
* (str.contains (str.substr val 0 (+ n (str.indexof val S 0))) S) where n > 0
*
* @param e Constraint to be checked
* @param ind Extracted (str.indexof val S 0) term
* @param m Ast manager
* @param m_util_s string ast util
* @param m_util_a arith ast util
* @return true <-> if of the particular form.
*/
bool is_contains_index(expr* e, expr*& ind, ast_manager& m, seq_util& m_util_s, arith_util& m_util_a);

}
#endif
1 change: 0 additions & 1 deletion src/smt/theory_str_noodler/formula.cpp
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@

namespace smt::noodler {
std::set<BasicTerm> Predicate::get_vars() const {
assert(is_eq_or_ineq());
std::set<BasicTerm> vars;
for (const auto& side: params) {
for (const auto &term: side) {
15 changes: 15 additions & 0 deletions src/smt/theory_str_noodler/formula.h
Original file line number Diff line number Diff line change
@@ -523,6 +523,21 @@ namespace smt::noodler {
this->predicates = new_predicates;
}

/**
* @brief Does the Formula contain a predicate of a type @p type ?
*
* @param type Type of the predicate.
* @return true <-> Formula contains predicate of type @p type.
*/
bool contains_pred_type(PredicateType type) const {
for(const Predicate& pred : this->predicates) {
if(pred.get_type() == type) {
return true;
}
}
return false;
}

/**
* @brief Get union of variables from all predicates
*
Loading