forked from Z3Prover/z3
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathregex.h
161 lines (142 loc) · 5.96 KB
/
regex.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#ifndef _NOODLER_REGEX_H_
#define _NOODLER_REGEX_H_
#include <functional>
#include <list>
#include <set>
#include <stack>
#include <map>
#include <memory>
#include <queue>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "smt/params/smt_params.h"
#include "ast/arith_decl_plugin.h"
#include "ast/seq_decl_plugin.h"
#include "smt/params/theory_str_params.h"
#include "util/scoped_vector.h"
#include "util/union_find.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/rewriter/th_rewriter.h"
#include "formula.h"
#include "util.h"
#include "aut_assignment.h"
// FIXME most if not all these functions should probably be in theory_str_noodler
namespace smt::noodler::regex {
using expr_pair = std::pair<expr_ref, expr_ref>;
using expr_pair_flag = std::tuple<expr_ref, expr_ref, bool>;
// bound for loop (above this number an optimized construction is used)
const unsigned LOOP_BOUND = 5000;
// simulation reduction bound in states (bigger automata are not reduced)
const unsigned RED_BOUND = 1000;
/**
* @brief Info gathered about a regex.
* - min_length: length of shortest words in the regex. In fact it expresses that in the regex there is no
* word shorter than min_length. It does not mean that regex contains a word of length exactly min_length.
* If empty == l_true or l_undef, this value is not valid.
* - universal: is regex universal?
* - empty: is regex empty?
*/
struct RegexInfo {
unsigned min_length;
lbool universal;
lbool empty;
};
/**
* @brief Alphabet wrapper for Z3 alphabet represented by std::set<mata::Symbol> and a Mata alphabet.
*/
struct Alphabet {
std::set<mata::Symbol> alphabet;
mata::OnTheFlyAlphabet mata_alphabet;
Alphabet(const std::set<mata::Symbol>& alph) : alphabet(alph) {
for (const auto& symbol : alph) {
this->mata_alphabet.add_new_symbol(std::to_string(symbol), symbol);
}
}
/// @brief Returns any symbol that is not in the alphabet
mata::Symbol get_unused_symbol() const {
// std::set is ordered, so alphabet is also ordered
if (*alphabet.begin() != 0) {
return 0;
} else {
auto it = alphabet.begin();
mata::Symbol s = *it;
++it;
while (it != alphabet.end()) {
if (s+1 != *it) {
return s+1;
}
++it;
}
return (*it)+1;
}
}
/// @brief Return zstring corresponding the the word @p word, where dummy symbol is replaced with some valid symbol not in the alphabet.
zstring get_string_from_mata_word(mata::Word word) const {
zstring res;
mata::Symbol unused_symbol = get_unused_symbol();
std::replace(word.begin(), word.end(), util::get_dummy_symbol(), unused_symbol);
return zstring(word.size(), word.data());
}
};
/**
* Extract symbols from a given expression @p ex. Append to the output parameter @p alphabet.
* @param[in] ex Expression to be checked for symbols.
* @param[in] m_util_s Seq util for AST.
* @param[out] alphabet A set of symbols with where found symbols are appended to.
*/
void extract_symbols(expr* const ex, const seq_util& m_util_s, std::set<uint32_t>& alphabet);
/**
* Convert expression @p expr to NFA.
* @param[in] expression Expression to be converted to NFA.
* @param[in] m_util_s Seq util for AST.
* @param[in] alphabet Alphabet to be used in re.allchar (SMT2: '.') expressions.
* @param[in] determinize Determinize intermediate automata
* @param[in] make_complement Whether to make complement of the passed @p expr instead.
* @return The resulting regex.
*/
[[nodiscard]] mata::nfa::Nfa conv_to_nfa(const app *expression, const seq_util& m_util_s, const ast_manager& m,
const Alphabet& alphabet, bool determinize = false, bool make_complement = false);
/**
* @brief Get basic information about the regular expression in the form of RegexInfo (see the description above).
* RegexInfo gathers information about emptiness; universality; length of shortest words
*
* @param expression Regex to be checked
* @param m_util_s string ast util
* @param m ast manager
* @return RegexInfo
*/
RegexInfo get_regex_info(const app *expression, const seq_util& m_util_s);
/**
* @brief Create bounded iteration of a given automaton.
*
* @param body_nfa Core NFA
* @param count Number of concatenations
* @return mata::nfa::Nfa NFA
*/
mata::nfa::Nfa create_large_concat(const mata::nfa::Nfa& body_nfa, unsigned count);
/**
* @brief Get the sum of loops of a regex (loop inside a loop is multiplied)
*
* @param reg some regular expression predicate (can be also string literal/var)
* @param m_util_s string ast util
* @return sum of loops inside @p regex, with nested loops multiplied
*/
unsigned get_loop_sum(const app* reg, const seq_util& m_util_s);
class regex_model_fail : public default_exception {
public:
regex_model_fail() : default_exception("Failed to find model of a regex") {}
};
/**
* @brief Try to g et some word accepted by @p regex
*
* It currently cannot handle intersection, complement, or string variables inside regex.
*
* @param regex Regex to be checked
* @param m_util_s string ast util
* @return word accepted by @p regex
* @throws regex_model_fail if the model cannot be found (either regex represents empty language, or it contains intersection/complement/string variables, which this function currently cannot handle)
*/
zstring get_model_from_regex(const app *regex, const seq_util& m_util_s);
}
#endif