forked from Z3Prover/z3
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdecision_procedure.cpp
1740 lines (1523 loc) · 101 KB
/
decision_procedure.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <queue>
#include <utility>
#include <algorithm>
#include <functional>
#include <mata/nfa/strings.hh>
#include "util.h"
#include "aut_assignment.h"
#include "decision_procedure.h"
#include "regex.h"
namespace smt::noodler {
void SolvingState::substitute_vars(std::unordered_map<BasicTerm, std::vector<BasicTerm>> &substitution_map) {
// substitutes variables in a vector using substitution_map
auto substitute_vector = [&substitution_map](const std::vector<BasicTerm> &vector) {
std::vector<BasicTerm> result;
for (const BasicTerm &var : vector) {
if (substitution_map.count(var) == 0) {
result.push_back(var);
} else {
const auto &to_this = substitution_map.at(var);
result.insert(result.end(), to_this.begin(), to_this.end());
}
}
return result;
};
// substitutes variables in both sides of inclusion using substitution_map
auto substitute_inclusion = [&substitute_vector](const Predicate &inclusion) {
std::vector<BasicTerm> new_left_side = substitute_vector(inclusion.get_left_side());
std::vector<BasicTerm> new_right_side = substitute_vector(inclusion.get_right_side());
return Predicate{inclusion.get_type(), { new_left_side, new_right_side }};
};
// returns true if the inclusion has the same thing on both sides
auto inclusion_has_same_sides = [](const Predicate &inclusion) { return inclusion.get_left_side() == inclusion.get_right_side(); };
// substitutes variables of inclusions in a vector using substitute_map, but does not keep the ones that have the same sides after substitution
auto substitute_set = [&substitute_inclusion, &inclusion_has_same_sides](const std::set<Predicate> inclusions) {
std::set<Predicate> new_inclusions;
for (const auto &old_inclusion : inclusions) {
auto new_inclusion = substitute_inclusion(old_inclusion);
if (!inclusion_has_same_sides(new_inclusion)) {
new_inclusions.insert(new_inclusion);
}
}
return new_inclusions;
};
inclusions = substitute_set(inclusions);
inclusions_not_on_cycle = substitute_set(inclusions_not_on_cycle);
// substituting inclusions to process is bit harder, it is possible that two inclusions that were supposed to
// be processed become same after substituting, so we do not want to keep both in inclusions to process
std::set<Predicate> substituted_inclusions_to_process;
std::deque<Predicate> new_inclusions_to_process;
while (!inclusions_to_process.empty()) {
Predicate substituted_inclusion = substitute_inclusion(inclusions_to_process.front());
inclusions_to_process.pop_front();
if (!inclusion_has_same_sides(substituted_inclusion) // we do not want to add inclusion that is already in inclusions_to_process
&& substituted_inclusions_to_process.count(substituted_inclusion) == 0) {
new_inclusions_to_process.push_back(substituted_inclusion);
}
}
inclusions_to_process = new_inclusions_to_process;
}
LenNode SolvingState::get_lengths(const BasicTerm& var) const {
if (aut_ass.count(var) > 0) {
// if var is not substituted, get length constraint from its automaton
return aut_ass.get_lengths(var);
} else if (substitution_map.count(var) > 0) {
// if var is substituted, i.e. state.substitution_map[var] = x_1 x_2 ... x_n, then we have to create length equation
// |var| = |x_1| + |x_2| + ... + |x_n|
std::vector<LenNode> plus_operands;
for (const auto& subst_var : substitution_map.at(var)) {
plus_operands.emplace_back(subst_var);
}
LenNode result(LenFormulaType::EQ, {var, LenNode(LenFormulaType::PLUS, plus_operands)});
// to be safe, we add |var| >= 0 (for the aut_ass case, it is done in aut_ass.get_lengths)
return LenNode(LenFormulaType::AND, {result, LenNode(LenFormulaType::LEQ, {0, var})});
} else {
util::throw_error("Variable was neither in automata assignment nor was substituted");
return LenNode(BasicTerm(BasicTermType::Literal)); // return something to get rid of warnings
}
}
void SolvingState::flatten_substition_map() {
std::unordered_map<BasicTerm, std::vector<BasicTerm>> new_substitution_map;
std::function<std::vector<BasicTerm>(const BasicTerm&)> flatten_var;
flatten_var = [&new_substitution_map, &flatten_var, this](const BasicTerm &var) -> std::vector<BasicTerm> {
if (new_substitution_map.count(var) == 0) {
std::vector<BasicTerm> flattened_mapping;
for (const auto &subst_var : this->substitution_map.at(var)) {
if (aut_ass.count(subst_var) > 0) {
// subst_var is not substituted, keep it
flattened_mapping.push_back(subst_var);
} else {
// subst_var has a substitution, flatten it and insert it to the end of flattened_mapping
std::vector<BasicTerm> flattened_mapping_of_subst_var = flatten_var(subst_var);
flattened_mapping.insert(flattened_mapping.end(),
flattened_mapping_of_subst_var.begin(),
flattened_mapping_of_subst_var.end());
}
}
new_substitution_map[var] = flattened_mapping;
return flattened_mapping;
} else {
return new_substitution_map[var];
}
};
for (const auto &subst_map_pair : substitution_map) {
flatten_var(subst_map_pair.first);
}
STRACE("str-nfa",
tout << "Flattened substitution map:" << std::endl;
for (const auto &var_map : new_substitution_map) {
tout << " " << var_map.first.get_name() << " ->";
for (const auto &subst_var : var_map.second) {
tout << " " << subst_var;
}
tout << std::endl;
});
substitution_map = new_substitution_map;
}
lbool DecisionProcedure::compute_next_solution() {
// iteratively select next state of solving that can lead to solution and
// process one of the unprocessed nodes (or possibly find solution)
STRACE("str", tout << "------------------------"
<< "Getting another solution"
<< "------------------------" << std::endl;);
while (!worklist.empty()) {
SolvingState element_to_process = std::move(worklist.front());
worklist.pop_front();
if (element_to_process.inclusions_to_process.empty()) {
// we found another solution, element_to_process contain the automata
// assignment and variable substition that satisfy the original
// inclusion graph
solution = std::move(element_to_process);
STRACE("str",
tout << "Found solution:" << std::endl;
for (const auto &var_substitution : solution.substitution_map) {
tout << " " << var_substitution.first << " ->";
for (const auto& subst_var : var_substitution.second) {
tout << " " << subst_var;
}
tout << std::endl;
}
for (const auto& var_aut : solution.aut_ass) {
tout << " " << var_aut.first << " -> NFA" << std::endl;
if (is_trace_enabled("str-nfa")) {
var_aut.second->print_to_mata(tout);
}
}
);
return l_true;
}
// we will now process one inclusion from the inclusion graph which is at front
// i.e. we will update automata assignments and substitutions so that this inclusion is fulfilled
Predicate inclusion_to_process = element_to_process.inclusions_to_process.front();
element_to_process.inclusions_to_process.pop_front();
// this will decide whether we will continue in our search by DFS or by BFS
bool is_inclusion_to_process_on_cycle = element_to_process.is_inclusion_on_cycle(inclusion_to_process);
STRACE("str", tout << "Processing node with inclusion " << inclusion_to_process << " which is" << (is_inclusion_to_process_on_cycle ? " " : " not ") << "on the cycle" << std::endl;);
STRACE("str",
tout << "Length variables are:";
for(auto const &var : inclusion_to_process.get_vars()) {
if (element_to_process.length_sensitive_vars.count(var)) {
tout << " " << var.to_string();
}
}
tout << std::endl;
);
const auto &left_side_vars = inclusion_to_process.get_left_side();
const auto &right_side_vars = inclusion_to_process.get_right_side();
/********************************************************************************************************/
/****************************************** One side is empty *******************************************/
/********************************************************************************************************/
// As kinda optimization step, we do "noodlification" for empty sides separately (i.e. sides that
// represent empty string). This is because it is simpler, we would get only one noodle so we just need to
// check that the non-empty side actually contains empty string and replace the vars on that side by epsilon.
if (right_side_vars.empty() || left_side_vars.empty()) {
std::unordered_map<BasicTerm, std::vector<BasicTerm>> substitution_map;
auto const non_empty_side_vars = right_side_vars.empty() ?
inclusion_to_process.get_left_set()
: inclusion_to_process.get_right_set();
bool non_empty_side_contains_empty_word = true;
for (const auto &var : non_empty_side_vars) {
if (element_to_process.aut_ass.contains_epsilon(var)) {
// var contains empty word, we substitute it with only empty word, but only if...
if (right_side_vars.empty() // ...non-empty side is the left side (var is from left) or...
|| element_to_process.length_sensitive_vars.count(var) > 0 // ...var is length-aware
) {
assert(substitution_map.count(var) == 0 && element_to_process.aut_ass.count(var) > 0);
// we prepare substitution for all vars on the left or only the length vars on the right
// (as non-length vars are probably not needed? TODO: would it make sense to update non-length vars too?)
substitution_map[var] = {};
element_to_process.aut_ass.erase(var);
}
} else {
// var does not contain empty word => whole non-empty side cannot contain empty word
non_empty_side_contains_empty_word = false;
break;
}
}
if (!non_empty_side_contains_empty_word) {
// in the case that the non_empty side does not contain empty word
// the inclusion cannot hold (noodlification would not create anything)
continue;
}
// TODO: all this following shit is done also during normal noodlification, I need to split it to some better defined functions
element_to_process.remove_inclusion(inclusion_to_process);
// We might be updating left side, in that case we need to process all nodes that contain the variables from the left,
// i.e. those nodes to which inclusion_to_process goes to. In the case we are updating right side, there will be no edges
// coming from inclusion_to_process, so this for loop will do nothing.
for (const auto &dependent_inclusion : element_to_process.get_dependent_inclusions(inclusion_to_process)) {
// we push only those nodes which are not already in inclusions_to_process
// if the inclusion_to_process is on cycle, we need to do BFS
// if it is not on cycle, we can do DFS
// TODO: can we really do DFS??
element_to_process.push_unique(dependent_inclusion, is_inclusion_to_process_on_cycle);
}
// do substitution in the inclusion graph
element_to_process.substitute_vars(substitution_map);
// update the substitution_map of new_element by the new substitutions
element_to_process.substitution_map.merge(substitution_map);
// TODO: should we really push to front when not on cycle?
// TODO: maybe for this case of one side being empty, we should just push to front?
if (!is_inclusion_to_process_on_cycle) {
worklist.push_front(element_to_process);
} else {
worklist.push_back(element_to_process);
}
continue;
}
/********************************************************************************************************/
/*************************************** End of one side is empty ***************************************/
/********************************************************************************************************/
/********************************************************************************************************/
/****************************************** Process left side *******************************************/
/********************************************************************************************************/
std::vector<std::shared_ptr<mata::nfa::Nfa>> left_side_automata;
STRACE("str-nfa", tout << "Left automata:" << std::endl);
for (const auto &l_var : left_side_vars) {
left_side_automata.push_back(element_to_process.aut_ass.at(l_var));
STRACE("str-nfa",
tout << "Automaton for left var " << l_var.get_name() << ":" << std::endl << *left_side_automata.back();
);
}
/********************************************************************************************************/
/************************************** End of left side processing *************************************/
/********************************************************************************************************/
/********************************************************************************************************/
/***************************************** Process right side *******************************************/
/********************************************************************************************************/
// We combine the right side into automata where we concatenate non-length-aware vars next to each other.
// Each right side automaton corresponds to either concatenation of non-length-aware vars (vector of
// basic terms) or one lenght-aware var (vector of one basic term). Division then contains for each right
// side automaton the variables whose concatenation it represents.
std::vector<std::shared_ptr<mata::nfa::Nfa>> right_side_automata;
std::vector<std::vector<BasicTerm>> right_side_division;
assert(!right_side_vars.empty()); // empty case was processed at the beginning
auto right_var_it = right_side_vars.begin();
auto right_side_end = right_side_vars.end();
std::shared_ptr<mata::nfa::Nfa> next_aut = element_to_process.aut_ass[*right_var_it];
std::vector<BasicTerm> next_division{ *right_var_it };
bool last_was_length = (element_to_process.length_sensitive_vars.count(*right_var_it) > 0);
bool is_there_length_on_right = last_was_length;
++right_var_it;
STRACE("str-nfa", tout << "Right automata:" << std::endl);
for (; right_var_it != right_side_end; ++right_var_it) {
std::shared_ptr<mata::nfa::Nfa> right_var_aut = element_to_process.aut_ass.at(*right_var_it);
if (element_to_process.length_sensitive_vars.count(*right_var_it) > 0) {
// current right_var is length-aware
right_side_automata.push_back(next_aut);
right_side_division.push_back(next_division);
STRACE("str-nfa",
tout << "Automaton for right var(s)";
for (const auto &r_var : next_division) {
tout << " " << r_var.get_name();
}
tout << ":" << std::endl
<< *next_aut;
);
next_aut = right_var_aut;
next_division = std::vector<BasicTerm>{ *right_var_it };
last_was_length = true;
is_there_length_on_right = true;
} else {
// current right_var is not length-aware
if (last_was_length) {
// if last var was length-aware, we need to add automaton for it into right_side_automata
right_side_automata.push_back(next_aut);
right_side_division.push_back(next_division);
STRACE("str-nfa",
tout << "Automaton for right var(s)";
for (const auto &r_var : next_division) {
tout << " " << r_var.get_name();
}
tout << ":" << std::endl
<< *next_aut;
);
next_aut = right_var_aut;
next_division = std::vector<BasicTerm>{ *right_var_it };
} else {
// if last var was not length-aware, we combine it (and possibly the non-length-aware vars before)
// with the current one
next_aut = std::make_shared<mata::nfa::Nfa>(mata::nfa::concatenate(*next_aut, *right_var_aut));
next_division.push_back(*right_var_it);
// TODO should we reduce size here?
}
last_was_length = false;
}
}
right_side_automata.push_back(next_aut);
right_side_division.push_back(next_division);
STRACE("str-nfa",
tout << "Automaton for right var(s)";
for (const auto &r_var : next_division) {
tout << " " << r_var.get_name();
}
tout << ":" << std::endl << *next_aut;
);
/********************************************************************************************************/
/************************************* End of right side processing *************************************/
/********************************************************************************************************/
/********************************************************************************************************/
/****************************************** Inclusion test **********************************************/
/********************************************************************************************************/
if (!is_there_length_on_right) {
// we have no length-aware variables on the right hand side => we need to check if inclusion holds
assert(right_side_automata.size() == 1); // there should be exactly one element in right_side_automata as we do not have length variables
// TODO probably we should try shortest words, it might work correctly
if (is_inclusion_to_process_on_cycle // we do not test inclusion if we have node that is not on cycle, because we will not go back to it (TODO: should we really not test it?)
&& mata::nfa::is_included(element_to_process.aut_ass.get_automaton_concat(left_side_vars), *right_side_automata[0])) {
// TODO can I push to front? I think I can, and I probably want to, so I can immediately test if it is not sat (if element_to_process.inclusions_to_process is empty), or just to get to sat faster
worklist.push_front(element_to_process);
// we continue as there is no need for noodlification, inclusion already holds
continue;
}
}
/********************************************************************************************************/
/*************************************** End of inclusion test ******************************************/
/********************************************************************************************************/
element_to_process.remove_inclusion(inclusion_to_process);
// We are going to change the automata on the left side (potentially also split some on the right side, but that should not have impact)
// so we need to add all nodes whose variable assignments are going to change on the right side (i.e. we follow inclusion graph) for processing.
// Warning: Self-loops are not in inclusion graph, but we might still want to add this node again to inclusions_to_process, however, this node will be
// split during noodlification, so we will only add parts whose right sides actually change (see below in noodlification)
for (const auto &node : element_to_process.get_dependent_inclusions(inclusion_to_process)) {
// we push only those nodes which are not already in inclusions_to_process
// if the inclusion_to_process is on cycle, we need to do BFS
// if it is not on cycle, we can do DFS
// TODO: can we really do DFS??
element_to_process.push_unique(node, is_inclusion_to_process_on_cycle);
}
// We will need the set of left vars, so we can sort the 'non-existing self-loop' in noodlification (see previous warning)
const auto left_vars_set = inclusion_to_process.get_left_set();
/* TODO check here if we have empty elements_to_process, if we do, then every noodle we get should finish and return sat
* right now if we test sat at the beginning it should work, but it is probably better to immediatly return sat if we have
* empty elements_to_process, however, we need to remmeber the state of the algorithm, we would need to return back to noodles
* and process them if z3 realizes that the result is actually not sat (because of lengths)
*/
/********************************************************************************************************/
/******************************************* Noodlification *********************************************/
/********************************************************************************************************/
/**
* We get noodles where each noodle consists of automata connected with a vector of numbers.
* So for example if we have some noodle and automaton noodle[i].first, then noodle[i].second is a vector,
* where first element i_l = noodle[i].second[0] tells us that automaton noodle[i].first belongs to the
* i_l-th left var (i.e. left_side_vars[i_l]) and the second element i_r = noodle[i].second[1] tell us that
* it belongs to the i_r-th division of the right side (i.e. right_side_division[i_r])
**/
auto noodles = mata::strings::seg_nfa::noodlify_for_equation(left_side_automata,
right_side_automata,
false,
{{"reduce", "forward"}});
for (const auto &noodle : noodles) {
STRACE("str", tout << "Processing noodle" << (is_trace_enabled("str-nfa") ? " with automata:" : "") << std::endl;);
SolvingState new_element = element_to_process;
/* Explanation of the next code on an example:
* Left side has variables x_1, x_2, x_3, x_2 while the right side has variables x_4, x_1, x_5, x_6, where x_1
* and x_4 are length-aware (i.e. there is one automaton for concatenation of x_5 and x_6 on the right side).
* Assume that noodle represents the case where it was split like this:
* | x_1 | x_2 | x_3 | x_2 |
* | t_1 | t_2 | t_3 | t_4 | t_5 | t_6 |
* | x_4 | x_1 | x_5 | x_6 |
* In the following for loop, we create the vars t1, t2, ..., t6 and prepare two vectors left_side_vars_to_new_vars
* and right_side_divisions_to_new_vars which map left vars and right divisions into the concatenation of the new
* vars. So for example left_side_vars_to_new_vars[1] = t_2 t_3, because second left var is x_2 and we map it to t_2 t_3,
* while right_side_divisions_to_new_vars[2] = t_6, because the third division on the right represents the automaton for
* concatenation of x_5 and x_6 and we map it to t_6.
*/
std::vector<std::vector<BasicTerm>> left_side_vars_to_new_vars(left_side_vars.size());
std::vector<std::vector<BasicTerm>> right_side_divisions_to_new_vars(right_side_division.size());
for (unsigned i = 0; i < noodle.size(); ++i) {
// TODO do not make a new_var if we can replace it with one left or right var (i.e. new_var is exactly left or right var)
// TODO also if we can substitute with epsilon, we should do that first? or generally process epsilon substitutions better, in some sort of 'preprocessing'
BasicTerm new_var = util::mk_noodler_var_fresh(std::string("align_") + std::to_string(noodlification_no));
left_side_vars_to_new_vars[noodle[i].second[0]].push_back(new_var);
right_side_divisions_to_new_vars[noodle[i].second[1]].push_back(new_var);
new_element.aut_ass[new_var] = noodle[i].first; // we assign the automaton to new_var
STRACE("str-nfa", tout << new_var << std::endl << *noodle[i].first;);
}
// Each variable that occurs in the left side or is length-aware needs to be substituted, we use this map for that
std::unordered_map<BasicTerm, std::vector<BasicTerm>> substitution_map;
/* Following the example from before, the following loop will create these inclusions from the right side divisions:
* t_1 t_2 ⊆ x_4
* t_3 t_4 t_5 ⊆ x_1
* t_6 ⊆ x_5 x_6
* However, we do not add the first two inclusions into the inclusion graph but use them for substitution, i.e.
* substitution_map[x_4] = t_1 t_2
* substitution_map[x_1] = t_3 t_4 t_5
* because they are length-aware vars.
*/
for (unsigned i = 0; i < right_side_division.size(); ++i) {
const auto &division = right_side_division[i];
if (division.size() == 1 && element_to_process.length_sensitive_vars.count(division[0]) != 0) {
// right side is length-aware variable y => we are either substituting or adding new inclusion "new_vars ⊆ y"
const BasicTerm &right_var = division[0];
if (substitution_map.count(right_var)) {
// right_var is already substituted, therefore we add 'new_vars ⊆ right_var' to the inclusion graph
// TODO: how to decide if sometihng is on cycle? by previous node being on cycle, or when we recompute inclusion graph edges?
const auto &new_inclusion = new_element.add_inclusion(right_side_divisions_to_new_vars[i], division, is_inclusion_to_process_on_cycle);
// we also add this inclusion to the worklist, as it represents unification
// we push it to the front if we are processing node that is not on the cycle, because it should not get stuck in the cycle then
// TODO: is this correct? can we push to the front?
// TODO: can't we push to front even if it is on cycle??
new_element.push_unique(new_inclusion, is_inclusion_to_process_on_cycle);
STRACE("str", tout << "added new inclusion from the right side because it could not be substituted: " << new_inclusion << std::endl; );
} else {
// right_var is not substitued by anything yet, we will substitute it
substitution_map[right_var] = right_side_divisions_to_new_vars[i];
STRACE("str", tout << "right side var " << right_var.get_name() << " replaced with:"; for (auto const &var : right_side_divisions_to_new_vars[i]) { tout << " " << var.get_name(); } tout << std::endl; );
// as right_var wil be substituted in the inclusion graph, we do not need to remember the automaton assignment for it
new_element.aut_ass.erase(right_var);
// update the length variables
for (const BasicTerm &new_var : right_side_divisions_to_new_vars[i]) {
new_element.length_sensitive_vars.insert(new_var);
}
}
} else {
// right side is non-length concatenation "y_1...y_n" => we are adding new inclusion "new_vars ⊆ y1...y_n"
// TODO: how to decide if sometihng is on cycle? by previous node being on cycle, or when we recompute inclusion graph edges?
// TODO: do we need to add inclusion if previous node was not on cycle? because I think it is not possible to get to this new node anyway
const auto &new_inclusion = new_element.add_inclusion(right_side_divisions_to_new_vars[i], division, is_inclusion_to_process_on_cycle);
// we add this inclusion to the worklist only if the right side contains something that was on the left (i.e. it was possibly changed)
if (SolvingState::is_dependent(left_vars_set, new_inclusion.get_right_set())) {
// TODO: again, push to front? back? where the fuck to push??
new_element.push_unique(new_inclusion, is_inclusion_to_process_on_cycle);
}
STRACE("str", tout << "added new inclusion from the right side (non-length): " << new_inclusion << std::endl; );
}
}
/* Following the example from before, the following loop will create these inclusions from the left side:
* x_1 ⊆ t_1
* x_2 ⊆ t_2 t_3
* x_3 ⊆ t_4
* x_2 ⊆ t_5 t_6
* Again, we want to use the inclusions for substitutions, but we replace only those variables which were
* not substituted yet, so the first inclusion stays (x_1 was substituted from the right side) and the
* fourth inclusion stays (as we substitute x_2 using the second inclusion). So from the second and third
* inclusion we get:
* substitution_map[x_2] = t_2 t_3
* substitution_map[x_3] = t_4
*/
for (unsigned i = 0; i < left_side_vars.size(); ++i) {
// TODO maybe if !is_there_length_on_right, we should just do intersection and not create new inclusions
const BasicTerm &left_var = left_side_vars[i];
if (left_var.is_literal()) {
// we skip literals, we do not want to substitute them
continue;
}
if (substitution_map.count(left_var)) {
// left_var is already substituted, therefore we add 'left_var ⊆ left_side_vars_to_new_vars[i]' to the inclusion graph
std::vector<BasicTerm> new_inclusion_left_side{ left_var };
// TODO: how to decide if sometihng is on cycle? by previous node being on cycle, or when we recompute inclusion graph edges?
const auto &new_inclusion = new_element.add_inclusion(new_inclusion_left_side, left_side_vars_to_new_vars[i], is_inclusion_to_process_on_cycle);
// we also add this inclusion to the worklist, as it represents unification
// we push it to the front if we are processing node that is not on the cycle, because it should not get stuck in the cycle then
// TODO: is this correct? can we push to the front?
// TODO: can't we push to front even if it is on cycle??
new_element.push_unique(new_inclusion, is_inclusion_to_process_on_cycle);
STRACE("str", tout << "added new inclusion from the left side because it could not be substituted: " << new_inclusion << std::endl; );
} else {
// TODO make this function or something, we do the same thing here as for the right side when substituting
// left_var is not substitued by anything yet, we will substitute it
substitution_map[left_var] = left_side_vars_to_new_vars[i];
STRACE("str", tout << "left side var " << left_var.get_name() << " replaced with:"; for (auto const &var : left_side_vars_to_new_vars[i]) { tout << " " << var.get_name(); } tout << std::endl; );
// as left_var wil be substituted in the inclusion graph, we do not need to remember the automaton assignment for it
new_element.aut_ass.erase(left_var);
// update the length variables
if (new_element.length_sensitive_vars.count(left_var) > 0) { // if left_var is length-aware => substituted vars should become length-aware
for (const BasicTerm &new_var : left_side_vars_to_new_vars[i]) {
new_element.length_sensitive_vars.insert(new_var);
}
}
}
}
// do substitution in the inclusion graph
new_element.substitute_vars(substitution_map);
// update the substitution_map of new_element by the new substitutions
new_element.substitution_map.merge(substitution_map);
// TODO should we really push to front when not on cycle?
if (!is_inclusion_to_process_on_cycle) {
worklist.push_front(new_element);
} else {
worklist.push_back(new_element);
}
}
++noodlification_no; // TODO: when to do this increment?? maybe noodlification_no should be part of SolvingState?
/********************************************************************************************************/
/*************************************** End of noodlification ******************************************/
/********************************************************************************************************/
}
// there are no solving states left, which means nothing led to solution -> it must be unsatisfiable
return l_false;
}
LenNode DecisionProcedure::get_initial_lengths(bool all_vars) {
if (init_length_sensitive_vars.empty()) {
// there are no length sensitive vars, so we can immediately say true
return LenNode(LenFormulaType::TRUE);
}
// start from length formula from preprocessing
std::vector<LenNode> conjuncts = {preprocessing_len_formula};
// for each initial length variable get the lengths of all its possible words for automaton in init_aut_ass
if(all_vars) {
for (const BasicTerm &var : this->formula.get_vars()) {
conjuncts.push_back(init_aut_ass.get_lengths(var));
}
} else {
for (const BasicTerm &var : this->init_length_sensitive_vars) {
conjuncts.push_back(init_aut_ass.get_lengths(var));
}
}
return LenNode(LenFormulaType::AND, conjuncts);
}
std::pair<LenNode, LenNodePrecision> DecisionProcedure::get_lengths() {
LenNodePrecision precision = LenNodePrecision::PRECISE; // start with precise and possibly change it later
if (solution.length_sensitive_vars.empty() && this->not_contains.get_predicates().empty()
&& this->disequations.get_predicates().empty()) {
// There is not notcontains predicate to be solved and there are no length vars (which also means no
// disequations nor conversions), it is not needed to create the lengths formula.
return {LenNode(LenFormulaType::TRUE), precision};
}
// start with formula for disequations
std::vector<LenNode> conjuncts = disequations_len_formula_conjuncts;
// add length formula from preprocessing
conjuncts.push_back(preprocessing_len_formula);
// create length constraints from the solution, we only need to look at length sensitive vars
for (const BasicTerm &len_var : solution.length_sensitive_vars) {
conjuncts.push_back(solution.get_lengths(len_var));
}
// the following functions (getting formula for conversions) assume that we have flattened substitution map
solution.flatten_substition_map();
// add formula for conversions
auto conv_form_with_precision = get_formula_for_conversions();
conjuncts.push_back(conv_form_with_precision.first);
precision = conv_form_with_precision.second;
// get the LIA formula describing solutions for special predicates
conjuncts.push_back(get_formula_for_ca_diseqs());
auto not_cont_prec = get_formula_for_not_contains();
precision = get_resulting_precision_for_conjunction(precision, not_cont_prec.second);
conjuncts.push_back(not_cont_prec.first);
LenNode result(LenFormulaType::AND, conjuncts);
STRACE("str", tout << "Final " << (precision == LenNodePrecision::PRECISE ? "precise" : "underapproximating") << " formula from get_lengths(): " << result << std::endl;);
return {result, precision};
}
std::pair<std::set<BasicTerm>,std::set<BasicTerm>> DecisionProcedure::get_vars_substituted_in_conversions() {
std::set<BasicTerm> code_subst_vars, int_subst_vars;
for (const TermConversion& conv : conversions) {
switch (conv.type)
{
case ConversionType::FROM_CODE:
case ConversionType::TO_CODE:
{
for (const BasicTerm& var : solution.get_substituted_vars(conv.string_var)) {
code_subst_vars.insert(var);
}
break;
}
case ConversionType::TO_INT:
case ConversionType::FROM_INT:
{
for (const BasicTerm& var : solution.get_substituted_vars(conv.string_var)) {
int_subst_vars.insert(var);
}
break;
}
default:
UNREACHABLE();
}
}
return {code_subst_vars, int_subst_vars};
}
LenNode DecisionProcedure::get_formula_for_code_subst_vars(const std::set<BasicTerm>& code_subst_vars) {
LenNode result(LenFormulaType::AND);
// for each code substituting variable c, create the formula
// (|c| != 1 && code_version_of(c) == -1) || (|c| == 1 && code_version_of(c) is code point of one of the chars in the language of automaton for c)
for (const BasicTerm& c : code_subst_vars) {
// non_char_case = (|c| != 1 && code_version_of(c) == -1)
LenNode non_char_case(LenFormulaType::AND, { {LenFormulaType::NEQ, std::vector<LenNode>{c, 1}}, {LenFormulaType::EQ, std::vector<LenNode>{code_version_of(c),-1}} });
// char_case = (|c| == 1 && code_version_of(c) is code point of one of the chars in the language of automaton for c)
LenNode char_case(LenFormulaType::AND, { {LenFormulaType::EQ, std::vector<LenNode>{c, 1}}, /* code_version_of(c) is code point of one of the chars in the language of automaton for c */ });
// the rest is just computing 'code_version_of(c) is code point of one of the chars in the language of automaton for c'
// chars in the language of c (except dummy symbol)
std::set<mata::Symbol> real_symbols_of_code_var;
bool is_there_dummy_symbol = false;
for (mata::Symbol s : mata::strings::get_accepted_symbols(*solution.aut_ass.at(c))) { // iterate trough chars of c
if (!util::is_dummy_symbol(s)) {
real_symbols_of_code_var.insert(s);
} else {
is_there_dummy_symbol = true;
}
}
if (!is_there_dummy_symbol) {
// if there is no dummy symbol, we can just create disjunction that code_version_of(c) is equal to one of the symbols in real_symbols_of_code_var
std::vector<LenNode> equal_to_one_of_symbols;
for (mata::Symbol s : real_symbols_of_code_var) {
equal_to_one_of_symbols.emplace_back(LenFormulaType::EQ, std::vector<LenNode>{code_version_of(c), s});
}
char_case.succ.emplace_back(LenFormulaType::OR, equal_to_one_of_symbols);
} else {
// if there is dummy symbol, then code_version_of(c) can be code point of any char, except those in the alphabet but not in real_symbols_of_code_var
// code_version_of(c) is valid code_point: (0 <= code_version_of(c) <= max_char)
char_case.succ.emplace_back(LenFormulaType::LEQ, std::vector<LenNode>{0, code_version_of(c)});
char_case.succ.emplace_back(LenFormulaType::LEQ, std::vector<LenNode>{code_version_of(c), zstring::max_char()});
// code_version_of(c) is not equal to code point of some symbol in the alphabet that is not in real_symbols_of_code_var
for (mata::Symbol s : solution.aut_ass.get_alphabet()) {
if (!util::is_dummy_symbol(s) && !real_symbols_of_code_var.contains(s)) {
char_case.succ.emplace_back(LenFormulaType::NEQ, std::vector<LenNode>{code_version_of(c), s});
}
}
}
result.succ.emplace_back(LenFormulaType::OR, std::vector<LenNode>{
non_char_case,
char_case
});
}
return result;
}
LenNode DecisionProcedure::encode_interval_words(const BasicTerm& var, const std::vector<interval_word>& interval_words) {
LenNode result(LenFormulaType::OR);
for (const auto& interval_word : interval_words) {
// How this works on an example:
// interval_word = [4-5][0-9][2-5][0-9][0-9]
// We need to encode, as succintcly as possible, that var is any number from the interval word.
// It is easy to see, that because last two positions can use all digits, we can create following inequations:
// ..200 <= var <= ..599
// where the first two digits are any of [4-5] and [0-9] respectively, i.e., the full encoding should be the
// following disjunct:
// 40200 <= var <= 40599 ||
// 41200 <= var <= 41599 ||
// ...
// 49200 <= var <= 49599 ||
// 50200 <= var <= 50599 ||
// 51200 <= var <= 51599 ||
// ...
// 59200 <= var <= 59599
// We first compute the vector interval_cases of all the intervals [40200-40599], [41200-41599], ...
// by going backwards in the interval_word, first by computing the main interval [..200-..599] which
// ends after hitting first digit interval that does not contain all digits (in the exmaple it is [2-5])
// and after that point, we add all the possible cases for all digits in the following digit intervals.
std::vector<std::pair<rational,rational>> interval_cases = { {rational(0),rational(0)} }; // start with interval [0-0], with the assumption that interval word is not empty
assert(interval_word.size() > 0);
rational place_value(1); // which point in the interval_word we are now (ones, tens, hundreds, etc.)
bool need_to_split = false; // have we hit the digit interval that does not contain all digits yet?
for (auto interval_it = interval_word.crbegin(); interval_it != interval_word.crend(); ++interval_it) { // going backwards in interval_word
// we are processing interval [s-e]
rational interval_start(interval_it->first - AutAssignment::DIGIT_SYMBOL_START);
rational interval_end(interval_it->second - AutAssignment::DIGIT_SYMBOL_START);
if (!need_to_split) {
// We are in the situation that all digit intervals after the currently processed one are in the form [0-9], i.e., we have
// ...[s-e][0-9]...[0-9]
// Therefore, we should have only one interval of the form [0...0-9...9] in interval_cases
assert(interval_cases.size() == 1);
// We change this interval into [s0...0-e9...9]
interval_cases[0].first += interval_start*place_value;
interval_cases[0].second += interval_end*place_value;
if (interval_start != 0 || interval_end != 9) {
// If the currently processed interval is not of the form [0-9], we will have to add all cases for
need_to_split = true;
}
} else {
// At least one of the digit intervals after the currently processed one is not in the form [0-9],
// so for each interval [S-E] in interval_cases and each digit d, s<=d<=e, we need to add interval
// [dS-dE] to the new vector of interval_cases.
std::vector<std::pair<rational,rational>> new_interval_cases;
for (std::pair<rational,rational>& interval_case : interval_cases) {
// for each interval [S-E] in interval_cases
for (rational possible_digit = interval_start; possible_digit <= interval_end; ++possible_digit) {
// for each digit d, s<=d<=e
new_interval_cases.push_back({
// add [dS-dE] to new_interval_cases
possible_digit*place_value + interval_case.first,
possible_digit*place_value + interval_case.second
});
}
}
interval_cases = new_interval_cases;
}
// move to the following place value (from ones to tens, from tens to hundreds, etc.)
place_value *= 10;
}
// After computing the vector interval_cases, we can just now create the inequalities
for (const auto& interval_case : interval_cases) {
rational min = interval_case.first;
rational max = interval_case.second;
// we want to put
// min <= var <= max
// but for the special case where min==max, we can just put
// var = min (= max)
if (min == max) {
result.succ.emplace_back(LenFormulaType::EQ, std::vector<LenNode>{var, min});
} else {
result.succ.emplace_back(LenFormulaType::AND, std::vector<LenNode>{
LenNode(LenFormulaType::LEQ, {min, var}),
LenNode(LenFormulaType::LEQ, {var, max})
});
}
}
}
return result;
}
std::pair<LenNode, LenNodePrecision> DecisionProcedure::get_formula_for_int_subst_vars(const std::set<BasicTerm>& int_subst_vars, const std::set<BasicTerm>& code_subst_vars, std::map<BasicTerm,std::vector<unsigned>>& int_subst_vars_to_possible_valid_lengths) {
LenNode result(LenFormulaType::AND);
LenNodePrecision res_precision = LenNodePrecision::PRECISE;
// automaton representing all valid inputs (only digits)
// - we also keep empty word, because we will use it for substituted vars, and one of them can be empty, while other has only digits (for example s1="45", s2="" but s=s1s2 = "45" is valid)
mata::nfa::Nfa only_digits = AutAssignment::digit_automaton_with_epsilon();
STRACE("str-conversion-int", tout << "only-digit NFA:" << std::endl << only_digits << std::endl;);
// automaton representing all non-valid inputs (contain non-digit)
mata::nfa::Nfa contain_non_digit = solution.aut_ass.complement_aut(only_digits);
STRACE("str-conversion-int", tout << "contains-non-digit NFA:" << std::endl << contain_non_digit << std::endl;);
for (const BasicTerm& int_subst_var : int_subst_vars) {
int_subst_vars_to_possible_valid_lengths[int_subst_var] = {};
// formula_for_int_subst_var should encode int_version_of(int_subst_var) = to_int(int_subts_var) for all words in solution.aut_ass.at(int_subst_var) while
// keeping the correspondence between |int_subst_var|, int_version_of(int_subst_var) and possibly also code_version_of(int_subst_var)
LenNode formula_for_int_subst_var(LenFormulaType::OR);
std::shared_ptr<mata::nfa::Nfa> aut = solution.aut_ass.at(int_subst_var);
STRACE("str-conversion-int", tout << "NFA for " << int_subst_var << ":" << std::endl << *aut << std::endl;);
// part containing only digits
mata::nfa::Nfa aut_valid_part = mata::nfa::reduce(mata::nfa::intersection(*aut, only_digits).trim());
STRACE("str-conversion-int", tout << "only-digit NFA:" << std::endl << aut_valid_part << std::endl;);
// part containing some non-digit
mata::nfa::Nfa aut_non_valid_part = mata::nfa::reduce(mata::nfa::intersection(*aut, contain_non_digit).trim());
STRACE("str-conversion-int", tout << "contains-non-digit NFA:" << std::endl << aut_non_valid_part << std::endl;);
// First handle the case of all words (except empty word) from solution.aut_ass.at(int_subst_var) that do not represent numbers
if (!aut_non_valid_part.is_lang_empty()) {
// aut_non_valid_part is language of words that contain at least one non-digit
// we therefore create following formula:
// |int_subst_var| is length of some word from aut_non_valid_part && int_version_of(int_subst_var) = -1
formula_for_int_subst_var.succ.emplace_back(LenFormulaType::AND, std::vector<LenNode>{ solution.aut_ass.get_lengths(aut_non_valid_part, int_subst_var), LenNode(LenFormulaType::EQ, { int_version_of(int_subst_var), -1 }) });
if (code_subst_vars.contains(int_subst_var)) {
// int_subst_var is used in some to_code/from_code
// => we need to add to the previous formula also the fact, that int_subst_var cannot encode code point of a digit
// .. && (code_version_of(int_subst_var) < AutAssignment::DIGIT_SYMBOL_START || AutAssignment::DIGIT_SYMBOL_END < code_version_of(int_subst_var))
formula_for_int_subst_var.succ.back().succ.emplace_back(LenFormulaType::OR, std::vector<LenNode>{
LenNode(LenFormulaType::LT, { code_version_of(int_subst_var), AutAssignment::DIGIT_SYMBOL_START }),
LenNode(LenFormulaType::LT, { AutAssignment::DIGIT_SYMBOL_END, code_version_of(int_subst_var) })
});
}
}
// Now, for each length l of some word containing only digits, we create the formula
// (|int_subst_var| = l && int_version_of(int_subst_var) is number represented by some word containing only digits of length l)
// and add l to int_subst_vars_to_possible_valid_lengths[int_subst_var].
// For l=0 we need to do something else with the second conjunct, and for l=1, we also need to add something about code_version_of(int_subt_var).
if (aut_valid_part.is_lang_empty()) {
result.succ.push_back(formula_for_int_subst_var);
continue;
}
// Handle l=0 as a special case.
if (aut_valid_part.is_in_lang({})) {
// Even though it is invalid and it seems that we should set int_version_of(int_subst_var) = -1, we cannot do that
// as int_subst_var is substituting some string_var in int conversion, and the other variables in the substitution
// might not be empty, so together they could form a valid string representing integer.
// In get_formula_for_int_conversion() we will actually ignore the value of int_version_of(int_subst_var) for the case
// that |int_subst_var| = 0, we just need it to be something else than -1.
// We therefore get the formula:
// |int_subst_var| = 0 && !(int_version_of(int_subst_var) = -1)
formula_for_int_subst_var.succ.emplace_back(LenFormulaType::AND, std::vector<LenNode>{
LenNode(LenFormulaType::EQ, { int_subst_var, 0 }),
LenNode(LenFormulaType::NEQ, { int_version_of(int_subst_var), -1 })
});
// Also add the information that int_subst_var can have length 0
int_subst_vars_to_possible_valid_lengths[int_subst_var].push_back(0);
}
// maximum length of l
unsigned max_length_of_words;
if (aut_valid_part.is_acyclic()) {
// there is a finite number of words containing only digits => the longest possible word is aut_valid_part.num_of_states()-1
max_length_of_words = aut_valid_part.num_of_states()-1;
} else {
// there is infinite number of such words => we need to underapproximate
STRACE("str-conversion", tout << "infinite NFA for which we need to do underapproximation:" << std::endl << aut_valid_part << std::endl;);
max_length_of_words = m_params.m_underapprox_length;
res_precision = LenNodePrecision::UNDERAPPROX;
}
// for lengths l=1 to max_length_of_words
for (unsigned l = 1; l <= max_length_of_words; ++l) {
// get automaton representing all accepted words containing only digits of length l
mata::nfa::Nfa aut_valid_of_length = mata::nfa::minimize(mata::nfa::intersection(aut_valid_part, AutAssignment::digit_automaton_of_length(l)).trim());
if (aut_valid_of_length.is_lang_empty()) {
// there are no such words
continue;
}
// remember that there are some valid words of length l
int_subst_vars_to_possible_valid_lengths[int_subst_var].push_back(l);
// |int_subst_var| = l && encode that int_version_of(int_subst_var) is a numeral represented by some interval word accepted by aut_valid_of_length
formula_for_int_subst_var.succ.emplace_back(LenFormulaType::AND, std::vector<LenNode>{
LenNode(LenFormulaType::EQ, { int_subst_var, l }),
encode_interval_words(int_version_of(int_subst_var), AutAssignment::get_interval_words(aut_valid_of_length))
});
if (code_subst_vars.contains(int_subst_var) && l == 1) {
// int_subst_var is used in some to_code/from_code AND we are handling the case of l==1 (for other lengths, the formula from get_formula_for_code_subst_vars should force that code_version_of(int_subst_var) is -1)
// => we need to connect code_version_of(int_subst_var) and int_version_of(int_subst_var)
// code_version_of(int_subst_var) = int_version_of(int_subst_var) + AutAssignment::DIGIT_SYMBOL_START
formula_for_int_subst_var.succ.back().succ.emplace_back(LenFormulaType::EQ, std::vector<LenNode>{
code_version_of(int_subst_var),
LenNode(LenFormulaType::PLUS, std::vector<LenNode>{int_version_of(int_subst_var), AutAssignment::DIGIT_SYMBOL_START })
});
}
}
result.succ.push_back(formula_for_int_subst_var);
}
STRACE("str-conversion-int", tout << "int_subst_vars formula: " << result << std::endl;);
return {result, res_precision};
}
LenNode DecisionProcedure::get_formula_for_code_conversion(const TermConversion& conv) {
const BasicTerm& s = conv.string_var;
const BasicTerm& c = conv.int_var;
// First handle the invalid inputs
LenNode invalid_case(LenFormulaType::AND);
if (conv.type == ConversionType::TO_CODE) {
// For to_code, invalid input is string whose length is not 1
// (|s| != 1 && c == -1)
invalid_case.succ.emplace_back(LenFormulaType::NEQ, std::vector<LenNode>{s, 1});
invalid_case.succ.emplace_back(LenFormulaType::EQ, std::vector<LenNode>{c, -1});
} else {
// For from_code, invalid input is a number not representing a code point of some char
// (|s| == 0 && c is not a valid code point)
invalid_case.succ.emplace_back(LenFormulaType::EQ, std::vector<LenNode>{s, 0});
// non-valid code point means that 'c < 0 || c > max_char'
invalid_case.succ.emplace_back(LenFormulaType::OR, std::vector<LenNode>{
LenNode(LenFormulaType::LT, {c, 0}),
LenNode(LenFormulaType::LT, {zstring::max_char(), c})
});
}
// For s=s_1s_2...s_n substitution in the flattened solution, we now handle the valid inputs:
// (|s| == 1 && c >= 0 && c is equal to one of code_version_of(s_i))
// This is shared for both to_code and from_code.
LenNode valid_case(LenFormulaType::AND, { {LenFormulaType::EQ, std::vector<LenNode>{s, 1}}, {LenFormulaType::LEQ, std::vector<LenNode>{0, c}}, /*c is equal to one of code_version_of(s_i))*/ });
// c is equal to one of code_version_of(s_i)
LenNode equal_to_one_subst_var(LenFormulaType::OR);
for (const BasicTerm& subst_var : solution.get_substituted_vars(s)) {
// c == code_version_of(s_i)
equal_to_one_subst_var.succ.emplace_back(LenFormulaType::EQ, std::vector<LenNode>{c, code_version_of(subst_var)});
}
valid_case.succ.push_back(equal_to_one_subst_var);
return LenNode(LenFormulaType::OR, std::vector<LenNode>{
invalid_case,
valid_case
});
}
LenNode DecisionProcedure::get_formula_for_int_conversion(const TermConversion& conv, const std::map<BasicTerm,std::vector<unsigned>>& int_subst_vars_to_possible_valid_lengths) {
const BasicTerm& s = conv.string_var;
const BasicTerm& i = conv.int_var;
LenNode result(LenFormulaType::OR);
// s = s_1 ... s_n, subst_vars = <s_1, ..., s_n>
const std::vector<BasicTerm>& subst_vars = solution.get_substituted_vars(s);
// first handle non-valid cases
if (conv.type == ConversionType::TO_INT) {
// for TO_INT empty string or anything that contains non-digit
LenNode empty_or_one_subst_contains_non_digit(LenFormulaType::OR, {LenNode(LenFormulaType::EQ, {s, 0})}); // start with empty string: |s| = 0
for (const BasicTerm& subst_var : subst_vars) {
// subst_var contain non-digit if one of s_i == -1 (see get_formula_for_int_subst_vars)
empty_or_one_subst_contains_non_digit.succ.emplace_back(LenFormulaType::EQ, std::vector<LenNode>{int_version_of(subst_var), -1});
}
result.succ.emplace_back(LenFormulaType::AND, std::vector<LenNode>{
empty_or_one_subst_contains_non_digit,
LenNode(LenFormulaType::EQ, {i, -1}) // for non-valid s, to_int(s) == -1
});
} else {
// for FROM_INT only empty string (as we assume that language of s was set to only possible results of from_int)
result.succ.emplace_back(LenFormulaType::AND, std::vector<LenNode>{
LenNode(LenFormulaType::LT, {i, 0}), // from_int(i) = "" only if i < 0
LenNode(LenFormulaType::EQ, {s, 0})
});
}