VeriFIT · vhavlena · Dec 20, 2023 · Oct 28, 2023 · Nov 2, 2023 · Nov 3, 2023
diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
@@ -1451,6 +1451,9 @@ br_status seq_rewriter::mk_seq_contains(expr* a, expr* b, expr_ref& result) {
         }
     }
 
+    // This is for Z3-Noodler as it may itroduce seq.unit functions, which are not supported
+    return BR_FAILED;
+
     unsigned offs = 0;
     unsigned sz = as.size();
     expr* b0 = bs.get(0);
@@ -1843,13 +1846,14 @@ br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result
     default:
         break;
     }
-    if (is_zero && !as.empty() && str().is_unit(as.get(0))) {
-        expr_ref a1(str().mk_concat(as.size() - 1, as.data() + 1, as[0]->get_sort()), m());
-        expr_ref b1(str().mk_index(a1, b, c), m());
-        result = m().mk_ite(str().mk_prefix(b, a), zero(), 
-                            m().mk_ite(m_autil.mk_ge(b1, zero()), m_autil.mk_add(one(), b1), minus_one()));
-        return BR_REWRITE3;
-    }
+    // FIXME: Not suitable for Z3-Noodler as it itroduces ite constructs inside predicates.
+    // if (is_zero && !as.empty() && str().is_unit(as.get(0))) {
+    //     expr_ref a1(str().mk_concat(as.size() - 1, as.data() + 1, as[0]->get_sort()), m());
+    //     expr_ref b1(str().mk_index(a1, b, c), m());
+    //     result = m().mk_ite(str().mk_prefix(b, a), zero(), 
+    //                         m().mk_ite(m_autil.mk_ge(b1, zero()), m_autil.mk_add(one(), b1), minus_one()));
+    //     return BR_REWRITE3;
+    // }
     expr_ref ra(a, m());
     if (str().is_unit(b) && m().is_value(b) && 
         reduce_by_char(ra, b, 4)) {

diff --git a/src/smt/CMakeLists.txt b/src/smt/CMakeLists.txt
@@ -76,6 +76,7 @@ z3_add_component(smt
     theory_str_noodler/nielsen_decision_procedure.cpp
     theory_str_noodler/formula.cpp
     theory_str_noodler/util.cc
+    theory_str_noodler/expr_cases.cpp
     theory_str_noodler/regex.cpp
     theory_str_noodler/aut_assignment.cpp
     theory_str_mc.cpp

diff --git a/src/smt/theory_str_noodler/aut_assignment.h b/src/smt/theory_str_noodler/aut_assignment.h
@@ -141,7 +141,7 @@ namespace smt::noodler {
                 len = cmp.num_of_states() - 1;
             else 
                 len = -1;
-            return cmp.num_of_states() == cmp.delta.num_of_transitions() + 1;
+            return (cmp.num_of_states() == cmp.delta.num_of_transitions() + 1) && (cmp.final.size() == 1);
         }
 
         /**
@@ -159,6 +159,19 @@ namespace smt::noodler {
             return aut.num_of_states() == aut.delta.num_of_transitions() + 1 && aut.initial.size() == 1 && aut.final.size() == 1;
         }
 
+        /**
+         * @brief Check if the given terms have disjoint languages.
+         * 
+         * @param t1 First term
+         * @param t2 Second term
+         * @return true <-> L(t1) and L(t2) are disjoint.
+         */
+        bool are_disjoint(const BasicTerm &t1, const BasicTerm& t2) const {
+            mata::nfa::Nfa aut_t1 = *this->at(t1);
+            mata::nfa::Nfa aut_t2 = *this->at(t2);
+            return  mata::nfa::intersection(aut_t1, aut_t2).is_lang_empty();
+        }
+
         /**
          * @brief Check if the language of the basic term has a fixed length
          * 
@@ -184,6 +197,9 @@ namespace smt::noodler {
          */
         bool is_sat() const {
             for (const auto& pr : *this) {
+                if(pr.second->final.size() == 0) {
+                    return false;
+                }
                 if(pr.second->is_lang_empty())
                     return false;
             }

diff --git a/src/smt/theory_str_noodler/decision_procedure.cpp b/src/smt/theory_str_noodler/decision_procedure.cpp
@@ -780,14 +780,31 @@ namespace smt::noodler {
         FormulaPreprocessor prep_handler{std::move(this->formula), std::move(this->init_aut_ass), std::move(this->init_length_sensitive_vars), m_params};
 
         // So-far just lightweight preprocessing
+        prep_handler.remove_trivial();
         prep_handler.reduce_diseqalities();
         if (opt == PreprocessType::UNDERAPPROX) {
             prep_handler.underapprox_languages();
         }
+        prep_handler.propagate_eps();
+        // Refinement of languages is beneficial only for instances containing not(contains) or disequalities (it is used to reduce the number of 
+        // disequations/not(contains). For a strong reduction you need to have languages as precise as possible). In the case of 
+        // pure equalitities it could create bigger automata, which may be problem later during the noodlification.
+        if(this->formula.contains_pred_type(PredicateType::Inequation) || this->not_contains.get_predicates().size() > 0) {
+            // Refine languages is applied in the order given by the predicates. Single iteration 
+            // might not update crucial variables that could contradict the formula. 
+            // Two iterations seem to be a good trade-off since the automata could explode in the fixpoint.
+            prep_handler.refine_languages();
+            prep_handler.refine_languages();
+        }
         prep_handler.propagate_variables();
         prep_handler.propagate_eps();
+        prep_handler.infer_alignment();
         prep_handler.remove_regular();
-        prep_handler.skip_len_sat();
+        // Skip_len_sat is not compatible with not(contains) and conversions as the preprocessing may skip equations with variables 
+        // inside not(contains)/conversion. (Note that if opt == PreprocessType::UNDERAPPROX, there is no not(contains)).
+        if(this->not_contains.get_predicates().empty() && this->conversions.empty()) {
+            prep_handler.skip_len_sat();
+        }
         prep_handler.generate_identities();
         prep_handler.propagate_variables();
         prep_handler.refine_languages();
@@ -808,6 +825,7 @@ namespace smt::noodler {
             }   
         );
         prep_handler.generate_equiv(len_eq_vars);
+        prep_handler.common_prefix_propagation();
         prep_handler.propagate_variables();
         prep_handler.generate_identities();
         prep_handler.remove_regular();
@@ -820,15 +838,24 @@ namespace smt::noodler {
             prep_handler.remove_regular();
             prep_handler.skip_len_sat();
         }
+        prep_handler.reduce_regular_sequence(1);
+        prep_handler.remove_regular();
 
         // Refresh the instance
         this->formula = prep_handler.get_modified_formula();
         this->init_aut_ass = prep_handler.get_aut_assignment();
         this->init_length_sensitive_vars = prep_handler.get_len_variables();
         this->preprocessing_len_formula = prep_handler.get_len_formula();
 
+        if (!this->init_aut_ass.is_sat()) {
+            // some automaton in the assignment is empty => we won't find solution
+            return l_false;
+        }
+
         // try to replace the not contains predicates (so-far we replace it by regular constraints)
-        replace_not_contains();
+        if(replace_not_contains() == l_false || can_unify_not_contains(prep_handler) == l_true) {
+            return l_false;
+        }
 
         // there remains some not contains --> return undef
         if(this->not_contains.get_predicates().size() > 0) {
@@ -967,11 +994,18 @@ namespace smt::noodler {
      * lit is a literal by a regular constraint x notin Alit' where  Alit' was obtained from A(lit) by setting all 
      * states initial and final. 
      */
-    void DecisionProcedure::replace_not_contains() {
+    lbool DecisionProcedure::replace_not_contains() {
         Formula remain_not_contains{};
         for(const Predicate& pred : this->not_contains.get_predicates()) {
             Concat left = pred.get_params()[0];
             Concat right = pred.get_params()[1];
+            if(left.size() == 1 && right.size() == 1) {
+                if(this->init_aut_ass.is_singleton(left[0]) && this->init_aut_ass.is_singleton(right[0])) {
+                    if(mata::nfa::are_equivalent(*this->init_aut_ass.at(left[0]), *this->init_aut_ass.at(right[0]))) {
+                        return l_false;
+                    }
+                }
+            }
             if(left.size() == 1 && right.size() == 1) {
                 if(this->init_aut_ass.is_singleton(left[0]) && right[0].is_variable()) {
                     mata::nfa::Nfa nfa_copy = *this->init_aut_ass.at(left[0]);
@@ -990,9 +1024,30 @@ namespace smt::noodler {
                     continue;
                 }
             }
+            if(right.size() == 1 && this->init_aut_ass.is_epsilon(right[0])) {
+                return l_false;
+            }
             remain_not_contains.add_predicate(pred);
         }
         this->not_contains = remain_not_contains;
+        return l_undef;
+    }
+
+    /**
+     * @brief Check if it is possible to syntactically unify not contains terms. If they are included (in the sense of vectors) the 
+     * not(contain) is unsatisfiable.
+     * 
+     * @param prep FormulaPreprocessor
+     * @return l_true -> can be unified
+     */
+    lbool DecisionProcedure::can_unify_not_contains(const FormulaPreprocessor& prep) {
+        for(const Predicate& pred : this->not_contains.get_predicates()) {
+            if(prep.can_unify_contain(pred.get_params()[0], pred.get_params()[1])) {
+                return l_true;
+            }
+
+        }
+        return l_undef;
     }
 
 } // Namespace smt::noodler.
diff --git a/src/smt/theory_str_noodler/decision_procedure.h b/src/smt/theory_str_noodler/decision_procedure.h
@@ -30,6 +30,9 @@ namespace smt::noodler {
         FROM_INT,
     };
 
+    // Term conversion: to_int/from_int ...
+    using TermConversion = std::tuple<BasicTerm,BasicTerm,ConversionType>;
+
     inline std::string get_conversion_name(ConversionType type) {
         switch (type)
         {
@@ -307,7 +310,7 @@ namespace smt::noodler {
         Formula formula;
         AutAssignment init_aut_ass;
         // contains to/from_code/int conversions
-        std::vector<std::tuple<BasicTerm,BasicTerm,ConversionType>> conversions;
+        std::vector<TermConversion> conversions;
 
         // the length formula from preprocessing, get_lengths should create conjunct with it
         LenNode preprocessing_len_formula = LenNode(LenFormulaType::TRUE,{});
@@ -338,8 +341,18 @@ namespace smt::noodler {
 
         /**
          * @brief Construct constraints to get rid of not_contains predicates.
+         * @return l_false -> unsatisfiable constaint; l_undef if it is not evident
+         */
+        lbool replace_not_contains();
+
+        /**
+         * @brief Check if it is possible to syntactically unify not contains terms. If they are included (in the sense of vectors) the 
+         * not(contain) is unsatisfiable.
+         * 
+         * @param prep FormulaPreprocessor
+         * @return l_true -> can be unified 
          */
-        void replace_not_contains();
+        lbool can_unify_not_contains(const FormulaPreprocessor& prep);
 
     public:
 
@@ -360,7 +373,7 @@ namespace smt::noodler {
              Formula formula, AutAssignment init_aut_ass,
              std::unordered_set<BasicTerm> init_length_sensitive_vars,
              const theory_str_noodler_params &par,
-             std::vector<std::tuple<BasicTerm,BasicTerm,ConversionType>> conversions
+             std::vector<TermConversion> conversions
         ) : init_length_sensitive_vars(init_length_sensitive_vars),
             formula(formula),
             init_aut_ass(init_aut_ass),

diff --git a/src/smt/theory_str_noodler/expr_cases.cpp b/src/smt/theory_str_noodler/expr_cases.cpp
@@ -0,0 +1,36 @@
+#include <cassert>
+
+#include "util/z3_exception.h"
+
+#include "util.h"
+#include "theory_str_noodler.h"
+#include "inclusion_graph.h"
+#include "aut_assignment.h"
+
+namespace {
+    using mata::nfa::Nfa;
+}
+
+namespace smt::noodler::expr_cases {
+
+bool is_contains_index(expr* e, expr*& ind, ast_manager& m, seq_util& m_util_s, arith_util& m_util_a) {
+    // e.g. (str.contains (str.substr value2 0 (+ n (str.indexof value2 "A" 0))) "A")
+    expr *subs = nullptr, *val = nullptr, *val_ind = nullptr, *str = nullptr, *str_ind = nullptr, *offset_ind = nullptr;
+    if(m_util_s.str.is_contains(e, subs, val)) {     // subs = (str.substr value2 0 (+ n (str.indexof value2 "A" 0)))
+        expr *subb1 = nullptr, *subb2 = nullptr, *num = nullptr;
+        rational num_val; //n
+        if(m_util_s.str.is_extract(subs, str, subb1, subb2)) {
+            if(m_util_a.is_zero(subb1) && m_util_a.is_add(subb2, num, ind) && m_util_a.is_numeral(num, num_val) && num_val.get_int32() > 0) { 
+                if(m_util_s.str.is_index(ind, str_ind, val_ind) || (m_util_s.str.is_index(ind, str_ind, val_ind, offset_ind) && m_util_a.is_zero(offset_ind))) {
+                    if(str->hash() != str_ind->hash() || val->hash() != val_ind->hash()) {
+                        return false;
+                    }
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
+}
diff --git a/src/smt/theory_str_noodler/expr_cases.h b/src/smt/theory_str_noodler/expr_cases.h
@@ -0,0 +1,46 @@
+#ifndef _NOODLER_EXPR_CASES_H_
+#define _NOODLER_EXPR_CASES_H_
+
+#include <functional>
+#include <list>
+#include <set>
+#include <stack>
+#include <map>
+#include <memory>
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "smt/params/smt_params.h"
+#include "ast/arith_decl_plugin.h"
+#include "ast/seq_decl_plugin.h"
+#include "smt/params/theory_str_params.h"
+#include "smt/smt_kernel.h"
+#include "smt/smt_theory.h"
+#include "smt/smt_arith_value.h"
+#include "util/scoped_vector.h"
+#include "util/union_find.h"
+#include "ast/rewriter/seq_rewriter.h"
+#include "ast/rewriter/th_rewriter.h"
+
+#include "formula.h"
+#include "aut_assignment.h"
+
+namespace smt::noodler::expr_cases {
+
+/**
+ * @brief Check if the given contraint @p e is of the form 
+ * (str.contains (str.substr val 0 (+ n (str.indexof val S 0))) S) where n > 0
+ * 
+ * @param e Constraint to be checked
+ * @param ind Extracted (str.indexof val S 0) term
+ * @param m Ast manager
+ * @param m_util_s string ast util
+ * @param m_util_a arith ast util
+ * @return true <-> if of the particular form.
+ */
+bool is_contains_index(expr* e, expr*& ind, ast_manager& m, seq_util& m_util_s, arith_util& m_util_a);
+
+}
+#endif
diff --git a/src/smt/theory_str_noodler/formula.cpp b/src/smt/theory_str_noodler/formula.cpp
@@ -3,7 +3,6 @@
 
 namespace smt::noodler {
     std::set<BasicTerm> Predicate::get_vars() const {
-        assert(is_eq_or_ineq());
         std::set<BasicTerm> vars;
         for (const auto& side: params) {
             for (const auto &term: side) {

diff --git a/src/smt/theory_str_noodler/formula.h b/src/smt/theory_str_noodler/formula.h
@@ -523,6 +523,21 @@ namespace smt::noodler {
             this->predicates = new_predicates;
         } 
 
+        /**
+         * @brief Does the Formula contain a predicate of a type @p type ?
+         * 
+         * @param type Type of the predicate.
+         * @return true <-> Formula contains predicate of type @p type.
+         */
+        bool contains_pred_type(PredicateType type) const {
+            for(const Predicate& pred : this->predicates) {
+                if(pred.get_type() == type) {
+                    return true;
+                }
+            }
+            return false;
+        }
+
         /**
          * @brief Get union of variables from all predicates
          *