@@ -22,6 +22,19 @@ use regex_syntax::hir::{Capture, Hir, HirKind, Literal, Look};
22
22
/// Maximum number of regex alternations (`foo|bar|...`) that will be expanded into multiple `LIKE` expressions.
23
23
const MAX_REGEX_ALTERNATIONS_EXPANSION : usize = 4 ;
24
24
25
+ /// Tries to convert a regexp expression to a `LIKE` or `Eq`/`NotEq` expression.
26
+ ///
27
+ /// This function also validates the regex pattern. And will return error if the
28
+ /// pattern is invalid.
29
+ ///
30
+ /// Typical cases this function can simplify:
31
+ /// - empty regex pattern to `LIKE '%'`
32
+ /// - literal regex patterns to `LIKE '%foo%'`
33
+ /// - full anchored regex patterns (e.g. `^foo$`) to `= 'foo'`
34
+ /// - partial anchored regex patterns (e.g. `^foo`) to `LIKE 'foo%'`
35
+ /// - combinations (alternatives) of the above, will be concatenated with `OR` or `AND`
36
+ ///
37
+ /// Dev note: unit tests of this function are in `expr_simplifier.rs`, case `test_simplify_regex`.
25
38
pub fn simplify_regex_expr (
26
39
left : Box < Expr > ,
27
40
op : Operator ,
@@ -53,13 +66,15 @@ pub fn simplify_regex_expr(
53
66
}
54
67
}
55
68
56
- // leave untouched if optimization didn't work
69
+ // Leave untouched if optimization didn't work
57
70
Ok ( Expr :: BinaryExpr ( BinaryExpr { left, op, right } ) )
58
71
}
59
72
60
73
#[ derive( Debug ) ]
61
74
struct OperatorMode {
75
+ /// Negative match.
62
76
not : bool ,
77
+ /// Ignore case (`true` for case-insensitive).
63
78
i : bool ,
64
79
}
65
80
@@ -80,6 +95,7 @@ impl OperatorMode {
80
95
Self { not, i }
81
96
}
82
97
98
+ /// Creates an [`LIKE`](Expr::Like) from the given `LIKE` pattern.
83
99
fn expr ( & self , expr : Box < Expr > , pattern : String ) -> Expr {
84
100
let like = Like {
85
101
negated : self . not ,
@@ -92,6 +108,7 @@ impl OperatorMode {
92
108
Expr :: Like ( like)
93
109
}
94
110
111
+ /// Creates an [`Expr::BinaryExpr`] of "`left` = `right`" or "`left` != `right`".
95
112
fn expr_matches_literal ( & self , left : Box < Expr > , right : Box < Expr > ) -> Expr {
96
113
let op = if self . not {
97
114
Operator :: NotEq
@@ -118,7 +135,7 @@ fn collect_concat_to_like_string(parts: &[Hir]) -> Option<String> {
118
135
Some ( s)
119
136
}
120
137
121
- /// returns a str represented by `Literal` if it contains a valid utf8
138
+ /// Returns a str represented by `Literal` if it contains a valid utf8
122
139
/// sequence and is safe for like (has no '%' and '_')
123
140
fn like_str_from_literal ( l : & Literal ) -> Option < & str > {
124
141
// if not utf8, no good
@@ -131,7 +148,7 @@ fn like_str_from_literal(l: &Literal) -> Option<&str> {
131
148
}
132
149
}
133
150
134
- /// returns a str represented by `Literal` if it contains a valid utf8
151
+ /// Returns a str represented by `Literal` if it contains a valid utf8
135
152
fn str_from_literal ( l : & Literal ) -> Option < & str > {
136
153
// if not utf8, no good
137
154
let s = std:: str:: from_utf8 ( & l. 0 ) . ok ( ) ?;
@@ -143,7 +160,7 @@ fn is_safe_for_like(c: char) -> bool {
143
160
( c != '%' ) && ( c != '_' )
144
161
}
145
162
146
- /// returns true if the elements in a `Concat` pattern are:
163
+ /// Returns true if the elements in a `Concat` pattern are:
147
164
/// - `[Look::Start, Look::End]`
148
165
/// - `[Look::Start, Literal(_), Look::End]`
149
166
fn is_anchored_literal ( v : & [ Hir ] ) -> bool {
@@ -157,10 +174,9 @@ fn is_anchored_literal(v: &[Hir]) -> bool {
157
174
v. last ( ) . expect ( "length checked" ) ,
158
175
) ;
159
176
if !matches ! ( first_last,
160
- ( s, e) if s. kind( ) == & HirKind :: Look ( Look :: Start )
177
+ ( s, e) if s. kind( ) == & HirKind :: Look ( Look :: Start )
161
178
&& e. kind( ) == & HirKind :: Look ( Look :: End )
162
- )
163
- {
179
+ ) {
164
180
return false ;
165
181
}
166
182
@@ -170,7 +186,7 @@ fn is_anchored_literal(v: &[Hir]) -> bool {
170
186
. all ( |h| matches ! ( h. kind( ) , HirKind :: Literal ( _) ) )
171
187
}
172
188
173
- /// returns true if the elements in a `Concat` pattern are:
189
+ /// Returns true if the elements in a `Concat` pattern are:
174
190
/// - `[Look::Start, Capture(Alternation(Literals...)), Look::End]`
175
191
fn is_anchored_capture ( v : & [ Hir ] ) -> bool {
176
192
if v. len ( ) != 3
@@ -197,7 +213,33 @@ fn is_anchored_capture(v: &[Hir]) -> bool {
197
213
true
198
214
}
199
215
200
- /// extracts a string literal expression assuming that [`is_anchored_literal`]
216
+ /// Returns the `LIKE` pattern if the `Concat` pattern is partial anchored:
217
+ /// - `[Look::Start, Literal(_)]`
218
+ /// - `[Literal(_), Look::End]`
219
+ /// Full anchored patterns are handled by [`anchored_literal_to_expr`].
220
+ fn partial_anchored_literal_to_like ( v : & [ Hir ] ) -> Option < String > {
221
+ if v. len ( ) != 2 {
222
+ return None ;
223
+ }
224
+
225
+ let ( lit, match_begin) = match ( & v[ 0 ] . kind ( ) , & v[ 1 ] . kind ( ) ) {
226
+ ( HirKind :: Look ( Look :: Start ) , HirKind :: Literal ( l) ) => {
227
+ ( like_str_from_literal ( l) ?, true )
228
+ }
229
+ ( HirKind :: Literal ( l) , HirKind :: Look ( Look :: End ) ) => {
230
+ ( like_str_from_literal ( l) ?, false )
231
+ }
232
+ _ => return None ,
233
+ } ;
234
+
235
+ if match_begin {
236
+ Some ( format ! ( "{}%" , lit) )
237
+ } else {
238
+ Some ( format ! ( "%{}" , lit) )
239
+ }
240
+ }
241
+
242
+ /// Extracts a string literal expression assuming that [`is_anchored_literal`]
201
243
/// returned true.
202
244
fn anchored_literal_to_expr ( v : & [ Hir ] ) -> Option < Expr > {
203
245
match v. len ( ) {
@@ -246,6 +288,7 @@ fn anchored_alternation_to_exprs(v: &[Hir]) -> Option<Vec<Expr>> {
246
288
None
247
289
}
248
290
291
+ /// Tries to lower (transform) a simple regex pattern to a LIKE expression.
249
292
fn lower_simple ( mode : & OperatorMode , left : & Expr , hir : & Hir ) -> Option < Expr > {
250
293
match hir. kind ( ) {
251
294
HirKind :: Empty => {
@@ -265,7 +308,9 @@ fn lower_simple(mode: &OperatorMode, left: &Expr, hir: &Hir) -> Option<Expr> {
265
308
. map ( |right| left. clone ( ) . in_list ( right, mode. not ) ) ;
266
309
}
267
310
HirKind :: Concat ( inner) => {
268
- if let Some ( pattern) = collect_concat_to_like_string ( inner) {
311
+ if let Some ( pattern) = partial_anchored_literal_to_like ( inner)
312
+ . or ( collect_concat_to_like_string ( inner) )
313
+ {
269
314
return Some ( mode. expr ( Box :: new ( left. clone ( ) ) , pattern) ) ;
270
315
}
271
316
}
@@ -274,6 +319,9 @@ fn lower_simple(mode: &OperatorMode, left: &Expr, hir: &Hir) -> Option<Expr> {
274
319
None
275
320
}
276
321
322
+ /// Calls [`lower_simple`] for each alternative and combine the results with `or` or `and`
323
+ /// based on [`OperatorMode`]. Any fail attempt to lower an alternative will makes this
324
+ /// function to return `None`.
277
325
fn lower_alt ( mode : & OperatorMode , left : & Expr , alts : & [ Hir ] ) -> Option < Expr > {
278
326
let mut accu: Option < Expr > = None ;
279
327
0 commit comments