forked from Z3Prover/z3
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathparikh_image.h
362 lines (286 loc) · 12.9 KB
/
parikh_image.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
#ifndef Z3_STR_PARIKH_H_
#define Z3_STR_PARIKH_H_
#include <iostream>
#include <map>
#include <set>
#include <queue>
#include <string>
#include <memory>
#include <concepts>
#include <compare>
#include <mata/nfa/nfa.hh>
#include <mata/nfa/strings.hh>
#include <mata/nfa/builder.hh>
#include "formula.h"
#include "counter_automaton.h"
#include "formula_preprocess.h"
namespace smt::noodler::parikh {
struct DiseqSide {
int predicate_idx;
ca::AtomicSymbol::PredicateSide side;
bool operator<(const DiseqSide& other) const {
if (predicate_idx < other.predicate_idx) return true;
if (predicate_idx > other.predicate_idx) return false;
return side < other.side;
}
bool operator>(const DiseqSide& other) const {
if (predicate_idx > other.predicate_idx) return true;
if (predicate_idx < other.predicate_idx) return false;
return side > other.side;
}
bool operator==(const DiseqSide& other) const = default;
};
struct DiseqSideWithLevelInfo {
DiseqSide side;
size_t level;
bool operator<(const DiseqSideWithLevelInfo& other) const {
if (level < other.level) return true;
if (level > other.level) return false;
return this->side < other.side;
}
bool operator==(const DiseqSideWithLevelInfo& other) const = default;
};
using Transition = std::tuple<mata::nfa::State, mata::Symbol, mata::nfa::State>;
// Structure storing for each state a vector of transitions adjacent to this state.
// In particular TransitionStateVector[state] is a vector of transitions with source state being state
using TransitionStateVector = std::vector<std::vector<Transition>>;
/**
* @brief Parikh image computation of NFA
*/
class ParikhImage {
private:
mata::nfa::Nfa nfa;
// variable gamma_q^I for each state q
// gamma_q^I == 1 --> q is the first state of a run
std::vector<BasicTerm> gamma_init {};
// variable gamma_q^F for each state q
// gamma_q^F == 1 --> q is the last state of a run
std::vector<BasicTerm> gamma_fin {};
// variable sigma_q for each state q
// sigma_q = n <--> shortest path on a run from an initial state is n
// sigma_q = -1 <--> q does not occur on the run
std::vector<BasicTerm> sigma {};
// variable for each transition
// counting the number times the transition was taken during the run
std::map<Transition, BasicTerm> trans {};
protected:
/**
* computation of temporary formulae
*/
/**
* @brief Compute formula phi_init saying there is one initial state of a run.
* @return LenNode phi_init
*/
LenNode compute_phi_init();
/**
* @brief Compute formula phi_fin saying there might be a final state as the last state of a run.
* @return LenNode phi_fin
*/
LenNode compute_phi_fin();
/**
* @brief Compute formula phi_kirch ensuring that on a run the number of times we enter the state
* equals the number of states we leave the state (+/- one when the state is the first one or the last one).
* @param succ_trans [q] = [(q,.,.), .... ]. Vector (idexed by states q) containing list of transitions with the source state being q
* @param prev_trans [q] = [(.,.,q), .... ]. Vector (idexed by states q) containing list of transitions with the target state being q
* @return LenNode phi_kirch
*/
LenNode compute_phi_kirch(const TransitionStateVector& succ_trans, const TransitionStateVector& prev_trans);
/**
* @brief Compute formulae phi_span ensures connectedness of a run. Formula checks if there is a consistent
* spanning tree wrt a run.
* @param succ_trans [q] = [(q,.,.), .... ]. Vector (idexed by states q) containing list of transitions with the source state being q
* @param prev_trans [q] = [(.,.,q), .... ]. Vector (idexed by states q) containing list of transitions with the target state being q
* @return LenNode phi_span
*/
LenNode compute_phi_span(const TransitionStateVector& succ_trans, const TransitionStateVector& prev_trans);
public:
ParikhImage(const mata::nfa::Nfa& nfa) : nfa(nfa) { }
/**
* @brief Compute Parikh image of the nfa without the symbol mapping.
* The output is the number of transitions taken; not the number of symbols taken.
*
* @return LenNode Parikh image
*/
virtual LenNode compute_parikh_image();
const std::map<Transition, BasicTerm>& get_trans_vars() const {
return this->trans;
}
const std::vector<BasicTerm>& get_gamma_init() const {
return this->gamma_init;
}
const std::vector<BasicTerm>& get_gamma_fin() const {
return this->gamma_fin;
};
const std::vector<BasicTerm>& get_sigma() const {
return this->sigma;
};
void print_transition_var_labeling(std::ostream& output_stream) const;
virtual ~ParikhImage() { }
};
/**
* @brief Parikh image computation of Tag automaton for single disequation
*/
class ParikhImageDiseqTag : public ParikhImage {
protected:
size_t number_of_states_in_row;
ca::TagAut ca;
// Maps every tag (e.g. <L, x>) to a corresponding #<L, x> variable representing
// the number of times a tag has been seen in the underlying automaton.
std::map<ca::AtomicSymbol, BasicTerm> tag_occurence_count_vars {};
// set of atomic symbols used in ca
std::set<ca::AtomicSymbol> atomic_symbols {};
protected:
/**
* @brief Get the formula describing |L| != |R| where L != R is @p diseq.
*
* @param diseq Disequation L != R
* @return LenNode
*/
LenNode get_diseq_length(const Predicate& diseq);
/**
* @brief Generate LIA formula describing lengths of variables @p vars.
*
* @param vars Variables
* @return LenNode Length formula
*/
LenNode get_var_length(const std::set<BasicTerm>& vars);
/**
* @brief Get the mismatch formula for each pair (i,j) of positions in @p diseq.
* phi := OR( mismatch(i,j) where i is position of left of diseq and j is position of right of diseq )
*
* @param diseq Disequation
* @return LenNode phi
*/
LenNode get_all_mismatch_formula(const Predicate& diseq);
/**
* @brief Get mismatch formula for particular positions @p i and @p j.
*
* @param i Position on the left side of @p diseq
* @param j Position on the right side of @p diseq
* @param diseq Diseq
* @return LenNode mismatch(i,j)
*/
LenNode get_mismatch_formula(size_t i, size_t j, const Predicate& diseq, const LenNode& add_right = 0);
/**
* @brief Get formula describing that for selected symbols <R,1,sym> <R,2,sym'>
* on the path we have that sym != sym'.
*
* @return LenNode Different mismatch symbol on the path
*/
LenNode get_diff_symbol_formula();
public:
std::vector<Predicate> predicates;
/**
* Register variables allowing to refer to the values of previous registers.
*/
std::vector<LenNode> registers_in_sampling_order;
/**
* Register variables per disequation sides. Allow to refer to mismatch symbols for every disequation.
*/
std::map<DiseqSide, LenNode> registers_per_disequation_side;
std::map<size_t, std::pair<BasicTerm, BasicTerm>> mismatch_pos_inside_vars_per_diseq;
ParikhImageDiseqTag(const ca::TagAut& ca, const std::set<ca::AtomicSymbol>& atomic_symbols, size_t number_of_states_in_row) :
ParikhImage(ca.nfa),
number_of_states_in_row(number_of_states_in_row),
ca(ca),
tag_occurence_count_vars(),
atomic_symbols(atomic_symbols) { }
/**
* @brief Compute Parikh image with the free variables containing values of registers.
* Assumes that each register is set in each symbol of the TagAut alphabet.
* @return LenNode phi_parikh
*/
LenNode compute_parikh_image() override {
LenNode pi = ParikhImage::compute_parikh_image();
LenNode sc = symbol_count_formula();
return LenNode(LenFormulaType::AND, {
pi,
sc
});
};
size_t get_predicate_count() const {
return this->predicates.size();
}
/**
* @brief Construct formula counting number of AtomicSymbol in each set on the transitions.
* @return LenNode AND (#symb for each AtomicSymbol symb)
*/
LenNode symbol_count_formula();
/**
* @brief Get Length formula for a disequation.
* phi := compute_parikh_image && get_var_length && (get_diseq_length || (get_all_mismatch_formula && get_diff_symbol_formula))
*
* @param diseq Diseq
* @return LenNode phi
*/
LenNode get_diseq_formula(const Predicate& diseq);
std::map<mata::Symbol, std::vector<LenNode>> group_sampling_transition_vars_by_symbol() const;
LenNode express_string_length_preceding_supposed_mismatch(const std::vector<BasicTerm>& predicate_side, size_t supposed_mismatch_pos) const;
std::pair<LenNode, LenNode> express_mismatch_position(const std::vector<BasicTerm>& predicate_side, size_t mismatch_pos, size_t sample_order_label, const LenNode* offset_var = nullptr) const;
LenNode count_register_stores_for_var_and_side(BasicTerm& var, int predicate_side_label) const;
LenNode ensure_symbol_uniqueness_using_total_sum(std::map<mata::Symbol, std::vector<LenNode>>& symbol_to_register_sample_vars) const;
LenNode ensure_symbol_uniqueness_using_implication(std::map<mata::Symbol, std::vector<LenNode>>& symbol_to_register_sample_vars) const;
LenNode make_sure_every_disequation_has_symbols_sampled() const;
LenNode assert_register_values();
LenNode establish_positions_inside_vars_based_on_sampling_order();
LenNode make_mismatch_pos_vars_assertion(int predicate_idx, const BasicTerm& left_var, const BasicTerm& right_var) const;
LenNode make_formula_binding_mismatch_pos_with_implications(const LenNode& var_to_restrict, const std::vector<std::vector<LenNode>>& control_vars_per_level, const BasicTerm& var_containing_mismatch) const;
LenNode make_mismatch_existence_assertion_for_diseq(size_t predicate_idx) const;
void init_mismatch_pos_inside_vars_per_diseq();
std::vector<std::vector<LenNode>> collect_sampling_transisions_for_diseq_and_var(const BasicTerm& var, int diseq_idx, ca::AtomicSymbol::PredicateSide side) const;
LenNode get_formula_for_multiple_diseqs(const std::vector<Predicate>& disequations);
LenNode assert_copy_transition_correctness() const;
};
typedef std::pair<mata::nfa::State, mata::nfa::State> StatePair;
/**
* @brief Parikh image computation for tag automaton representing not contains.
*/
class ParikhImageNotContTag : public ParikhImageDiseqTag {
private:
LenNode offset_var;
protected:
/**
* @brief Get the mismatch formula for each pair (i,j) of positions in @p not_cont.
* phi := OR( mismatch(i,j,offset_var) where i is position of left of not_cont and j is position of right of not_cont ).
* offset_var is added to the right side.
*
* @param not_cont Notcontains
* @return LenNode phi
*/
LenNode get_nt_all_mismatch_formula(const Predicate& not_cont);
public:
ParikhImageNotContTag(const ca::TagAut& ca, const std::set<ca::AtomicSymbol>& atomic_symbols, size_t number_of_states_in_copy)
: ParikhImageDiseqTag(ca, atomic_symbols, number_of_states_in_copy), offset_var(util::mk_noodler_var_fresh("offset_var")) { }
/**
* Create a formula asserting that |LHS| - |RHS| < offset_var.
*
* Assumes that the variables inside the underlying notContains will be present
* in the overall notContains LIA formula, and that they in fact represent |x|.
*/
LenNode mk_rhs_longer_than_lhs_formula(const Predicate& notContains);
mata::nfa::State map_copy_state_into_its_origin(const mata::nfa::State state) const;
/**
* Given a @p parikh_image of the underlying tag automaton, group transitions that were
* created using the same transition of the eps-concatenation as a template.
*
* For example, if the eps-concatenation contained (q, a, q'), then the tag automaton
* will contain multiple transitions (q, <Tags>, q') (a bit over-simplified).
*
* Internally abuses the fact that there the state `p` and `p+K` for some fixed K is a copy
* of the same state from the eps-concatenation.
*/
std::unordered_map<StatePair, std::vector<LenNode>> group_isomorphic_transitions_across_copies(const std::map<Transition, BasicTerm>& parikh_image) const;
/**
* Create a formula asserting that @p parikh_image and @p other_parikh_image
* encode the same run in the underlying eps-concatenation.
*/
LenNode mk_parikh_images_encode_same_word_formula(const std::map<Transition, BasicTerm>& parikh_image, const std::map<Transition, BasicTerm>& other_image) const;
LenNode get_not_cont_formula(const Predicate& not_cont);
LenNode get_offset_var() const;
LenNode existentially_quantify_all_parikh_vars(LenNode& formula);
};
}
namespace std {
std::ostream& operator<<(std::ostream& out_stream, const smt::noodler::parikh::Transition& transition);
}
#endif