You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
feat: support Utf8View type in starts_with function (#11787)
* feat: support `Utf8View` for `starts_with`
* style: clippy
* simplify string view handling
* fix: allow utf8 and largeutf8 to be cast into utf8view
* fix: fix test
* Apply suggestions from code review
Co-authored-by: Yongting You <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
* style: fix format
* feat: add addiontal tests
* tests: improve tests
* fix: fix null case
* tests: one more null test
* Test comments and execution tests
---------
Co-authored-by: Yongting You <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
02)--TableScan: test projection=[column1_utf8, column1_utf8view, column1_dict]
357
357
358
+
### `STARTS_WITH`
359
+
360
+
# Test STARTS_WITH with utf8view against utf8view, utf8, and largeutf8
361
+
# (should be no casts)
362
+
query TT
363
+
EXPLAIN SELECT
364
+
STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
365
+
STARTS_WITH(column1_utf8view, column2_utf8) as c2,
366
+
STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
367
+
FROM test;
368
+
----
369
+
logical_plan
370
+
01)Projection: starts_with(test.column1_utf8view, test.column2_utf8view) AS c1, starts_with(test.column1_utf8view, CAST(test.column2_utf8 AS Utf8View)) AS c2, starts_with(test.column1_utf8view, CAST(test.column2_large_utf8 AS Utf8View)) AS c3
371
+
02)--TableScan: test projection=[column2_utf8, column2_large_utf8, column1_utf8view, column2_utf8view]
372
+
373
+
query BBB
374
+
SELECT
375
+
STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
376
+
STARTS_WITH(column1_utf8view, column2_utf8) as c2,
377
+
STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
378
+
FROM test;
379
+
----
380
+
false false false
381
+
true true true
382
+
true true true
383
+
NULL NULL NULL
384
+
385
+
# Test STARTS_WITH with utf8 against utf8view, utf8, and largeutf8
386
+
# Should work, but will have to cast to common types
387
+
# should cast utf8 -> utf8view and largeutf8 -> utf8view
388
+
query TT
389
+
EXPLAIN SELECT
390
+
STARTS_WITH(column1_utf8, column2_utf8view) as c1,
391
+
STARTS_WITH(column1_utf8, column2_utf8) as c3,
392
+
STARTS_WITH(column1_utf8, column2_large_utf8) as c4
393
+
FROM test;
394
+
----
395
+
logical_plan
396
+
01)Projection: starts_with(__common_expr_1, test.column2_utf8view) AS c1, starts_with(test.column1_utf8, test.column2_utf8) AS c3, starts_with(__common_expr_1, CAST(test.column2_large_utf8 AS Utf8View)) AS c4
397
+
02)--Projection: CAST(test.column1_utf8 AS Utf8View) AS __common_expr_1, test.column1_utf8, test.column2_utf8, test.column2_large_utf8, test.column2_utf8view
398
+
03)----TableScan: test projection=[column1_utf8, column2_utf8, column2_large_utf8, column2_utf8view]
399
+
400
+
query BBB
401
+
SELECT
402
+
STARTS_WITH(column1_utf8, column2_utf8view) as c1,
403
+
STARTS_WITH(column1_utf8, column2_utf8) as c3,
404
+
STARTS_WITH(column1_utf8, column2_large_utf8) as c4
405
+
FROM test;
406
+
----
407
+
false false false
408
+
true true true
409
+
true true true
410
+
NULL NULL NULL
411
+
412
+
413
+
# Test STARTS_WITH with utf8view against literals
414
+
# In this case, the literals should be cast to utf8view. The columns
415
+
# should not be cast to utf8.
416
+
query TT
417
+
EXPLAIN SELECT
418
+
STARTS_WITH(column1_utf8view, 'äöüß') as c1,
419
+
STARTS_WITH(column1_utf8view, '') as c2,
420
+
STARTS_WITH(column1_utf8view, NULL) as c3,
421
+
STARTS_WITH(NULL, column1_utf8view) as c4
422
+
FROM test;
423
+
----
424
+
logical_plan
425
+
01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, starts_with(test.column1_utf8view, Utf8View("")) AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
426
+
02)--TableScan: test projection=[column1_utf8view]
358
427
359
428
statement ok
360
429
drop table test;
@@ -376,6 +445,5 @@ select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;
0 commit comments