Skip to content

Commit 71e138c

Browse files
committed
Re-write impl of overlay
Signed-off-by: Chojan Shang <[email protected]>
1 parent f482212 commit 71e138c

File tree

1 file changed

+83
-122
lines changed

1 file changed

+83
-122
lines changed

datafusion/functions/src/string/overlay.rs

+83-122
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,77 @@ impl ScalarUDFImpl for OverlayFunc {
8989
}
9090
}
9191

92+
macro_rules! process_overlay {
93+
// For the three-argument case
94+
($string_array:expr, $characters_array:expr, $pos_num:expr, $is_view:expr) => {{
95+
$string_array
96+
.iter()
97+
.zip($characters_array.iter())
98+
.zip($pos_num.iter())
99+
.map(|((string, characters), start_pos)| {
100+
match (string, characters, start_pos) {
101+
(Some(string), Some(characters), Some(start_pos)) => {
102+
let string_len = string.chars().count();
103+
let characters_len = characters.chars().count();
104+
let replace_len = characters_len as i64;
105+
let mut res =
106+
String::with_capacity(string_len.max(characters_len));
107+
108+
//as sql replace index start from 1 while string index start from 0
109+
if start_pos > 1 && start_pos - 1 < string_len as i64 {
110+
let start = (start_pos - 1) as usize;
111+
res.push_str(&string[..start]);
112+
}
113+
res.push_str(characters);
114+
// if start + replace_len - 1 >= string_length, just to string end
115+
if start_pos + replace_len - 1 < string_len as i64 {
116+
let end = (start_pos + replace_len - 1) as usize;
117+
res.push_str(&string[end..]);
118+
}
119+
Ok(Some(res))
120+
}
121+
_ => Ok(None),
122+
}
123+
})
124+
.collect::<Result<GenericStringArray<T>>>()
125+
}};
126+
127+
// For the four-argument case
128+
($string_array:expr, $characters_array:expr, $pos_num:expr, $len_num:expr, $is_view:expr) => {{
129+
$string_array
130+
.iter()
131+
.zip($characters_array.iter())
132+
.zip($pos_num.iter())
133+
.zip($len_num.iter())
134+
.map(|(((string, characters), start_pos), len)| {
135+
match (string, characters, start_pos, len) {
136+
(Some(string), Some(characters), Some(start_pos), Some(len)) => {
137+
let string_len = string.chars().count();
138+
let characters_len = characters.chars().count();
139+
let replace_len = len.min(string_len as i64);
140+
let mut res =
141+
String::with_capacity(string_len.max(characters_len));
142+
143+
//as sql replace index start from 1 while string index start from 0
144+
if start_pos > 1 && start_pos - 1 < string_len as i64 {
145+
let start = (start_pos - 1) as usize;
146+
res.push_str(&string[..start]);
147+
}
148+
res.push_str(characters);
149+
// if start + replace_len - 1 >= string_length, just to string end
150+
if start_pos + replace_len - 1 < string_len as i64 {
151+
let end = (start_pos + replace_len - 1) as usize;
152+
res.push_str(&string[end..]);
153+
}
154+
Ok(Some(res))
155+
}
156+
_ => Ok(None),
157+
}
158+
})
159+
.collect::<Result<GenericStringArray<T>>>()
160+
}};
161+
}
162+
92163
/// OVERLAY(string1 PLACING string2 FROM integer FOR integer2)
93164
/// Replaces a substring of string1 with string2 starting at the integer bit
94165
/// pgsql overlay('Txxxxas' placing 'hom' from 2 for 4) → Thomas
@@ -109,36 +180,8 @@ pub fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
109180
let characters_array = as_generic_string_array::<T>(&args[1])?;
110181
let pos_num = as_int64_array(&args[2])?;
111182

112-
let result = string_array
113-
.iter()
114-
.zip(characters_array.iter())
115-
.zip(pos_num.iter())
116-
.map(|((string, characters), start_pos)| {
117-
match (string, characters, start_pos) {
118-
(Some(string), Some(characters), Some(start_pos)) => {
119-
let string_len = string.chars().count();
120-
let characters_len = characters.chars().count();
121-
let replace_len = characters_len as i64;
122-
let mut res =
123-
String::with_capacity(string_len.max(characters_len));
124-
125-
//as sql replace index start from 1 while string index start from 0
126-
if start_pos > 1 && start_pos - 1 < string_len as i64 {
127-
let start = (start_pos - 1) as usize;
128-
res.push_str(&string[..start]);
129-
}
130-
res.push_str(characters);
131-
// if start + replace_len - 1 >= string_length, just to string end
132-
if start_pos + replace_len - 1 < string_len as i64 {
133-
let end = (start_pos + replace_len - 1) as usize;
134-
res.push_str(&string[end..]);
135-
}
136-
Ok(Some(res))
137-
}
138-
_ => Ok(None),
139-
}
140-
})
141-
.collect::<Result<GenericStringArray<T>>>()?;
183+
let result =
184+
process_overlay!(string_array, characters_array, pos_num, false)?;
142185
Ok(Arc::new(result) as ArrayRef)
143186
}
144187
4 => {
@@ -147,37 +190,13 @@ pub fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
147190
let pos_num = as_int64_array(&args[2])?;
148191
let len_num = as_int64_array(&args[3])?;
149192

150-
let result = string_array
151-
.iter()
152-
.zip(characters_array.iter())
153-
.zip(pos_num.iter())
154-
.zip(len_num.iter())
155-
.map(|(((string, characters), start_pos), len)| {
156-
match (string, characters, start_pos, len) {
157-
(Some(string), Some(characters), Some(start_pos), Some(len)) => {
158-
let string_len = string.chars().count();
159-
let characters_len = characters.chars().count();
160-
let replace_len = len.min(string_len as i64);
161-
let mut res =
162-
String::with_capacity(string_len.max(characters_len));
163-
164-
//as sql replace index start from 1 while string index start from 0
165-
if start_pos > 1 && start_pos - 1 < string_len as i64 {
166-
let start = (start_pos - 1) as usize;
167-
res.push_str(&string[..start]);
168-
}
169-
res.push_str(characters);
170-
// if start + replace_len - 1 >= string_length, just to string end
171-
if start_pos + replace_len - 1 < string_len as i64 {
172-
let end = (start_pos + replace_len - 1) as usize;
173-
res.push_str(&string[end..]);
174-
}
175-
Ok(Some(res))
176-
}
177-
_ => Ok(None),
178-
}
179-
})
180-
.collect::<Result<GenericStringArray<T>>>()?;
193+
let result = process_overlay!(
194+
string_array,
195+
characters_array,
196+
pos_num,
197+
len_num,
198+
false
199+
)?;
181200
Ok(Arc::new(result) as ArrayRef)
182201
}
183202
other => {
@@ -193,36 +212,7 @@ pub fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<Arra
193212
let characters_array = as_string_view_array(&args[1])?;
194213
let pos_num = as_int64_array(&args[2])?;
195214

196-
let result = string_array
197-
.iter()
198-
.zip(characters_array.iter())
199-
.zip(pos_num.iter())
200-
.map(|((string, characters), start_pos)| {
201-
match (string, characters, start_pos) {
202-
(Some(string), Some(characters), Some(start_pos)) => {
203-
let string_len = string.chars().count();
204-
let characters_len = characters.chars().count();
205-
let replace_len = characters_len as i64;
206-
let mut res =
207-
String::with_capacity(string_len.max(characters_len));
208-
209-
//as sql replace index start from 1 while string index start from 0
210-
if start_pos > 1 && start_pos - 1 < string_len as i64 {
211-
let start = (start_pos - 1) as usize;
212-
res.push_str(&string[..start]);
213-
}
214-
res.push_str(characters);
215-
// if start + replace_len - 1 >= string_length, just to string end
216-
if start_pos + replace_len - 1 < string_len as i64 {
217-
let end = (start_pos + replace_len - 1) as usize;
218-
res.push_str(&string[end..]);
219-
}
220-
Ok(Some(res))
221-
}
222-
_ => Ok(None),
223-
}
224-
})
225-
.collect::<Result<GenericStringArray<T>>>()?;
215+
let result = process_overlay!(string_array, characters_array, pos_num, true)?;
226216
Ok(Arc::new(result) as ArrayRef)
227217
}
228218
4 => {
@@ -231,37 +221,8 @@ pub fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<Arra
231221
let pos_num = as_int64_array(&args[2])?;
232222
let len_num = as_int64_array(&args[3])?;
233223

234-
let result = string_array
235-
.iter()
236-
.zip(characters_array.iter())
237-
.zip(pos_num.iter())
238-
.zip(len_num.iter())
239-
.map(|(((string, characters), start_pos), len)| {
240-
match (string, characters, start_pos, len) {
241-
(Some(string), Some(characters), Some(start_pos), Some(len)) => {
242-
let string_len = string.chars().count();
243-
let characters_len = characters.chars().count();
244-
let replace_len = len.min(string_len as i64);
245-
let mut res =
246-
String::with_capacity(string_len.max(characters_len));
247-
248-
//as sql replace index start from 1 while string index start from 0
249-
if start_pos > 1 && start_pos - 1 < string_len as i64 {
250-
let start = (start_pos - 1) as usize;
251-
res.push_str(&string[..start]);
252-
}
253-
res.push_str(characters);
254-
// if start + replace_len - 1 >= string_length, just to string end
255-
if start_pos + replace_len - 1 < string_len as i64 {
256-
let end = (start_pos + replace_len - 1) as usize;
257-
res.push_str(&string[end..]);
258-
}
259-
Ok(Some(res))
260-
}
261-
_ => Ok(None),
262-
}
263-
})
264-
.collect::<Result<GenericStringArray<T>>>()?;
224+
let result =
225+
process_overlay!(string_array, characters_array, pos_num, len_num, true)?;
265226
Ok(Arc::new(result) as ArrayRef)
266227
}
267228
other => {

0 commit comments

Comments
 (0)