@@ -21,7 +21,9 @@ use std::sync::Arc;
21
21
use arrow:: array:: { ArrayRef , GenericStringArray , OffsetSizeTrait } ;
22
22
use arrow:: datatypes:: DataType ;
23
23
24
- use datafusion_common:: cast:: { as_generic_string_array, as_int64_array} ;
24
+ use datafusion_common:: cast:: {
25
+ as_generic_string_array, as_int64_array, as_string_view_array,
26
+ } ;
25
27
use datafusion_common:: { exec_err, Result } ;
26
28
use datafusion_expr:: TypeSignature :: * ;
27
29
use datafusion_expr:: { ColumnarValue , Volatility } ;
@@ -46,8 +48,10 @@ impl OverlayFunc {
46
48
Self {
47
49
signature : Signature :: one_of (
48
50
vec ! [
51
+ Exact ( vec![ Utf8View , Utf8View , Int64 , Int64 ] ) ,
49
52
Exact ( vec![ Utf8 , Utf8 , Int64 , Int64 ] ) ,
50
53
Exact ( vec![ LargeUtf8 , LargeUtf8 , Int64 , Int64 ] ) ,
54
+ Exact ( vec![ Utf8View , Utf8View , Int64 ] ) ,
51
55
Exact ( vec![ Utf8 , Utf8 , Int64 ] ) ,
52
56
Exact ( vec![ LargeUtf8 , LargeUtf8 , Int64 ] ) ,
53
57
] ,
@@ -76,7 +80,9 @@ impl ScalarUDFImpl for OverlayFunc {
76
80
77
81
fn invoke ( & self , args : & [ ColumnarValue ] ) -> Result < ColumnarValue > {
78
82
match args[ 0 ] . data_type ( ) {
79
- DataType :: Utf8 => make_scalar_function ( overlay :: < i32 > , vec ! [ ] ) ( args) ,
83
+ DataType :: Utf8View | DataType :: Utf8 => {
84
+ make_scalar_function ( overlay :: < i32 > , vec ! [ ] ) ( args)
85
+ }
80
86
DataType :: LargeUtf8 => make_scalar_function ( overlay :: < i64 > , vec ! [ ] ) ( args) ,
81
87
other => exec_err ! ( "Unsupported data type {other:?} for function overlay" ) ,
82
88
}
@@ -87,7 +93,16 @@ impl ScalarUDFImpl for OverlayFunc {
87
93
/// Replaces a substring of string1 with string2 starting at the integer bit
88
94
/// pgsql overlay('Txxxxas' placing 'hom' from 2 for 4) → Thomas
89
95
/// overlay('Txxxxas' placing 'hom' from 2) -> Thomxas, without for option, str2's len is instead
90
- pub fn overlay < T : OffsetSizeTrait > ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
96
+ fn overlay < T : OffsetSizeTrait > ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
97
+ let use_string_view = args[ 0 ] . data_type ( ) == & DataType :: Utf8View ;
98
+ if use_string_view {
99
+ string_view_overlay :: < T > ( args)
100
+ } else {
101
+ string_overlay :: < T > ( args)
102
+ }
103
+ }
104
+
105
+ pub fn string_overlay < T : OffsetSizeTrait > ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
91
106
match args. len ( ) {
92
107
3 => {
93
108
let string_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
@@ -171,6 +186,90 @@ pub fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
171
186
}
172
187
}
173
188
189
+ pub fn string_view_overlay < T : OffsetSizeTrait > ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
190
+ match args. len ( ) {
191
+ 3 => {
192
+ let string_array = as_string_view_array ( & args[ 0 ] ) ?;
193
+ let characters_array = as_string_view_array ( & args[ 1 ] ) ?;
194
+ let pos_num = as_int64_array ( & args[ 2 ] ) ?;
195
+
196
+ let result = string_array
197
+ . iter ( )
198
+ . zip ( characters_array. iter ( ) )
199
+ . zip ( pos_num. iter ( ) )
200
+ . map ( |( ( string, characters) , start_pos) | {
201
+ match ( string, characters, start_pos) {
202
+ ( Some ( string) , Some ( characters) , Some ( start_pos) ) => {
203
+ let string_len = string. chars ( ) . count ( ) ;
204
+ let characters_len = characters. chars ( ) . count ( ) ;
205
+ let replace_len = characters_len as i64 ;
206
+ let mut res =
207
+ String :: with_capacity ( string_len. max ( characters_len) ) ;
208
+
209
+ //as sql replace index start from 1 while string index start from 0
210
+ if start_pos > 1 && start_pos - 1 < string_len as i64 {
211
+ let start = ( start_pos - 1 ) as usize ;
212
+ res. push_str ( & string[ ..start] ) ;
213
+ }
214
+ res. push_str ( characters) ;
215
+ // if start + replace_len - 1 >= string_length, just to string end
216
+ if start_pos + replace_len - 1 < string_len as i64 {
217
+ let end = ( start_pos + replace_len - 1 ) as usize ;
218
+ res. push_str ( & string[ end..] ) ;
219
+ }
220
+ Ok ( Some ( res) )
221
+ }
222
+ _ => Ok ( None ) ,
223
+ }
224
+ } )
225
+ . collect :: < Result < GenericStringArray < T > > > ( ) ?;
226
+ Ok ( Arc :: new ( result) as ArrayRef )
227
+ }
228
+ 4 => {
229
+ let string_array = as_string_view_array ( & args[ 0 ] ) ?;
230
+ let characters_array = as_string_view_array ( & args[ 1 ] ) ?;
231
+ let pos_num = as_int64_array ( & args[ 2 ] ) ?;
232
+ let len_num = as_int64_array ( & args[ 3 ] ) ?;
233
+
234
+ let result = string_array
235
+ . iter ( )
236
+ . zip ( characters_array. iter ( ) )
237
+ . zip ( pos_num. iter ( ) )
238
+ . zip ( len_num. iter ( ) )
239
+ . map ( |( ( ( string, characters) , start_pos) , len) | {
240
+ match ( string, characters, start_pos, len) {
241
+ ( Some ( string) , Some ( characters) , Some ( start_pos) , Some ( len) ) => {
242
+ let string_len = string. chars ( ) . count ( ) ;
243
+ let characters_len = characters. chars ( ) . count ( ) ;
244
+ let replace_len = len. min ( string_len as i64 ) ;
245
+ let mut res =
246
+ String :: with_capacity ( string_len. max ( characters_len) ) ;
247
+
248
+ //as sql replace index start from 1 while string index start from 0
249
+ if start_pos > 1 && start_pos - 1 < string_len as i64 {
250
+ let start = ( start_pos - 1 ) as usize ;
251
+ res. push_str ( & string[ ..start] ) ;
252
+ }
253
+ res. push_str ( characters) ;
254
+ // if start + replace_len - 1 >= string_length, just to string end
255
+ if start_pos + replace_len - 1 < string_len as i64 {
256
+ let end = ( start_pos + replace_len - 1 ) as usize ;
257
+ res. push_str ( & string[ end..] ) ;
258
+ }
259
+ Ok ( Some ( res) )
260
+ }
261
+ _ => Ok ( None ) ,
262
+ }
263
+ } )
264
+ . collect :: < Result < GenericStringArray < T > > > ( ) ?;
265
+ Ok ( Arc :: new ( result) as ArrayRef )
266
+ }
267
+ other => {
268
+ exec_err ! ( "overlay was called with {other} arguments. It requires 3 or 4." )
269
+ }
270
+ }
271
+ }
272
+
174
273
#[ cfg( test) ]
175
274
mod tests {
176
275
use arrow:: array:: { Int64Array , StringArray } ;
0 commit comments