From a7eb44851e08d4aceee4549364ed56ab2db0b7b5 Mon Sep 17 00:00:00 2001 From: ongchi Date: Sun, 7 Jan 2024 20:03:52 +0800 Subject: [PATCH 01/12] Add array_has, array_has_all and array_has_any --- datafusion/tests/test_functions.py | 16 ++++++++++++++++ src/functions.rs | 6 ++++++ 2 files changed, 22 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index d0514f89..c19545b9 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -253,6 +253,22 @@ def test_array_functions(): f.list_length(col), lambda: [len(r) for r in data], ], + [ + f.array_has(col, literal(1.0)), + lambda: [1.0 in r for r in data], + ], + [ + f.array_has_all( + col, f.make_array(*[literal(v) for v in [1.0, 3.0, 5.0]]) + ), + lambda: [np.all([v in r for v in [1.0, 3.0, 5.0]]) for r in data], + ], + [ + f.array_has_any( + col, f.make_array(*[literal(v) for v in [1.0, 3.0, 5.0]]) + ), + lambda: [np.any([v in r for v in [1.0, 3.0, 5.0]]) for r in data], + ], ] for stmt, py_expr in test_items: diff --git a/src/functions.rs b/src/functions.rs index 3dc5322a..563aefd4 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -369,6 +369,9 @@ scalar_function!(list_element, ArrayElement); scalar_function!(list_extract, ArrayElement); scalar_function!(array_length, ArrayLength); scalar_function!(list_length, ArrayLength); +scalar_function!(array_has, ArrayHas); +scalar_function!(array_has_all, ArrayHasAll); +scalar_function!(array_has_any, ArrayHasAny); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -572,6 +575,9 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(list_extract))?; m.add_wrapped(wrap_pyfunction!(array_length))?; m.add_wrapped(wrap_pyfunction!(list_length))?; + m.add_wrapped(wrap_pyfunction!(array_has))?; + m.add_wrapped(wrap_pyfunction!(array_has_all))?; + m.add_wrapped(wrap_pyfunction!(array_has_any))?; Ok(()) } From d48bcb27c68b478a2dfa2fefbaabbf9cd66ed925 Mon Sep 17 00:00:00 2001 From: ongchi Date: Sun, 7 Jan 2024 22:13:56 +0800 Subject: [PATCH 02/12] Add array_position, array_indexof, list_position and list_indexof --- datafusion/tests/test_functions.py | 24 +++++++++++++++++++++++- src/functions.rs | 8 ++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index c19545b9..9a7dc277 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -207,6 +207,12 @@ def test_array_functions(): ) df = ctx.create_dataframe([[batch]]) + def py_indexof(arr, v): + try: + return arr.index(v) + 1 + except ValueError: + return np.nan + col = column("arr") test_items = [ [ @@ -269,10 +275,26 @@ def test_array_functions(): ), lambda: [np.any([v in r for v in [1.0, 3.0, 5.0]]) for r in data], ], + [ + f.array_position(col, literal(1.0)), + lambda: [py_indexof(r, 1.0) for r in data], + ], + [ + f.array_indexof(col, literal(1.0)), + lambda: [py_indexof(r, 1.0) for r in data], + ], + [ + f.list_position(col, literal(1.0)), + lambda: [py_indexof(r, 1.0) for r in data], + ], + [ + f.list_indexof(col, literal(1.0)), + lambda: [py_indexof(r, 1.0) for r in data], + ], ] for stmt, py_expr in test_items: - query_result = df.select(stmt).collect()[0].column(0).tolist() + query_result = np.array(df.select(stmt).collect()[0].column(0)) for a, b in zip(query_result, py_expr()): np.testing.assert_array_almost_equal(a, b) diff --git a/src/functions.rs b/src/functions.rs index 563aefd4..6eaf7aa9 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -372,6 +372,10 @@ scalar_function!(list_length, ArrayLength); scalar_function!(array_has, ArrayHas); scalar_function!(array_has_all, ArrayHasAll); scalar_function!(array_has_any, ArrayHasAny); +scalar_function!(array_position, ArrayPosition); +scalar_function!(array_indexof, ArrayPosition); +scalar_function!(list_position, ArrayPosition); +scalar_function!(list_indexof, ArrayPosition); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -578,6 +582,10 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_has))?; m.add_wrapped(wrap_pyfunction!(array_has_all))?; m.add_wrapped(wrap_pyfunction!(array_has_any))?; + m.add_wrapped(wrap_pyfunction!(array_position))?; + m.add_wrapped(wrap_pyfunction!(array_indexof))?; + m.add_wrapped(wrap_pyfunction!(list_position))?; + m.add_wrapped(wrap_pyfunction!(list_indexof))?; Ok(()) } From 46d30572e602131aa46a6e36a663bfbc82892960 Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 09:22:41 +0800 Subject: [PATCH 03/12] Add array_to_string, array_join, list_to_string and list_join --- datafusion/tests/test_functions.py | 24 ++++++++++++++++++++++++ src/functions.rs | 8 ++++++++ 2 files changed, 32 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 9a7dc277..8557d668 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -298,6 +298,30 @@ def py_indexof(arr, v): for a, b in zip(query_result, py_expr()): np.testing.assert_array_almost_equal(a, b) + obj_test_items = [ + [ + f.array_to_string(col, literal(",")), + lambda: [",".join([str(int(v)) for v in r]) for r in data], + ], + [ + f.array_join(col, literal(",")), + lambda: [",".join([str(int(v)) for v in r]) for r in data], + ], + [ + f.list_to_string(col, literal(",")), + lambda: [",".join([str(int(v)) for v in r]) for r in data], + ], + [ + f.list_join(col, literal(",")), + lambda: [",".join([str(int(v)) for v in r]) for r in data], + ], + ] + + for stmt, py_expr in obj_test_items: + query_result = np.array(df.select(stmt).collect()[0].column(0)) + for a, b in zip(query_result, py_expr()): + assert a == b + def test_string_functions(df): df = df.select( diff --git a/src/functions.rs b/src/functions.rs index 6eaf7aa9..78da1519 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -376,6 +376,10 @@ scalar_function!(array_position, ArrayPosition); scalar_function!(array_indexof, ArrayPosition); scalar_function!(list_position, ArrayPosition); scalar_function!(list_indexof, ArrayPosition); +scalar_function!(array_to_string, ArrayToString); +scalar_function!(array_join, ArrayToString); +scalar_function!(list_to_string, ArrayToString); +scalar_function!(list_join, ArrayToString); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -586,6 +590,10 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_indexof))?; m.add_wrapped(wrap_pyfunction!(list_position))?; m.add_wrapped(wrap_pyfunction!(list_indexof))?; + m.add_wrapped(wrap_pyfunction!(array_to_string))?; + m.add_wrapped(wrap_pyfunction!(array_join))?; + m.add_wrapped(wrap_pyfunction!(list_to_string))?; + m.add_wrapped(wrap_pyfunction!(list_join))?; Ok(()) } From 328f58f63ba9ce52c8e86a3d2645319670cedc1f Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 11:48:05 +0800 Subject: [PATCH 04/12] Add array_ndims and list_ndims --- datafusion/tests/test_functions.py | 8 ++++++++ src/functions.rs | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 8557d668..19569512 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -291,6 +291,14 @@ def py_indexof(arr, v): f.list_indexof(col, literal(1.0)), lambda: [py_indexof(r, 1.0) for r in data], ], + [ + f.array_ndims(col), + lambda: [np.array(r).ndim for r in data], + ], + [ + f.list_ndims(col), + lambda: [np.array(r).ndim for r in data], + ], ] for stmt, py_expr in test_items: diff --git a/src/functions.rs b/src/functions.rs index 78da1519..3c433d9f 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -380,6 +380,8 @@ scalar_function!(array_to_string, ArrayToString); scalar_function!(array_join, ArrayToString); scalar_function!(list_to_string, ArrayToString); scalar_function!(list_join, ArrayToString); +scalar_function!(array_ndims, ArrayNdims); +scalar_function!(list_ndims, ArrayNdims); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -594,6 +596,8 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_join))?; m.add_wrapped(wrap_pyfunction!(list_to_string))?; m.add_wrapped(wrap_pyfunction!(list_join))?; + m.add_wrapped(wrap_pyfunction!(array_ndims))?; + m.add_wrapped(wrap_pyfunction!(list_ndims))?; Ok(()) } From 984148828d412594fc7109a447e46a16bd1ca80c Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 16:19:19 +0800 Subject: [PATCH 05/12] Add array_push_back, list_append and list_push_back --- datafusion/tests/test_functions.py | 12 ++++++++++++ src/functions.rs | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 19569512..6072a931 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -219,6 +219,18 @@ def py_indexof(arr, v): f.array_append(col, literal(99.0)), lambda: [np.append(arr, 99.0) for arr in data], ], + [ + f.array_push_back(col, literal(99.0)), + lambda: [np.append(arr, 99.0) for arr in data], + ], + [ + f.list_append(col, literal(99.0)), + lambda: [np.append(arr, 99.0) for arr in data], + ], + [ + f.list_push_back(col, literal(99.0)), + lambda: [np.append(arr, 99.0) for arr in data], + ], [ f.array_concat(col, col), lambda: [np.concatenate([arr, arr]) for arr in data], diff --git a/src/functions.rs b/src/functions.rs index 3c433d9f..7792cc90 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -359,6 +359,9 @@ scalar_function!(decode, Decode); // Array Functions scalar_function!(array_append, ArrayAppend); +scalar_function!(array_push_back, ArrayAppend); +scalar_function!(list_append, ArrayAppend); +scalar_function!(list_push_back, ArrayAppend); scalar_function!(array_concat, ArrayConcat); scalar_function!(array_cat, ArrayConcat); scalar_function!(array_dims, ArrayDims); @@ -575,6 +578,9 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { // Array Functions m.add_wrapped(wrap_pyfunction!(array_append))?; + m.add_wrapped(wrap_pyfunction!(array_push_back))?; + m.add_wrapped(wrap_pyfunction!(list_append))?; + m.add_wrapped(wrap_pyfunction!(list_push_back))?; m.add_wrapped(wrap_pyfunction!(array_concat))?; m.add_wrapped(wrap_pyfunction!(array_cat))?; m.add_wrapped(wrap_pyfunction!(array_dims))?; From dabb83a22e748242d9a91b65e32dc559c1c4b124 Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 16:22:10 +0800 Subject: [PATCH 06/12] Add array_prepend, array_push_front, list_prepend and list_push_front --- datafusion/tests/test_functions.py | 16 ++++++++++++++++ src/functions.rs | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 6072a931..c0f94ce4 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -311,6 +311,22 @@ def py_indexof(arr, v): f.list_ndims(col), lambda: [np.array(r).ndim for r in data], ], + [ + f.array_prepend(literal(99.0), col), + lambda: [np.insert(arr, 0, 99.0) for arr in data], + ], + [ + f.array_push_front(literal(99.0), col), + lambda: [np.insert(arr, 0, 99.0) for arr in data], + ], + [ + f.list_prepend(literal(99.0), col), + lambda: [np.insert(arr, 0, 99.0) for arr in data], + ], + [ + f.list_push_front(literal(99.0), col), + lambda: [np.insert(arr, 0, 99.0) for arr in data], + ], ] for stmt, py_expr in test_items: diff --git a/src/functions.rs b/src/functions.rs index 7792cc90..872c8fb2 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -385,6 +385,10 @@ scalar_function!(list_to_string, ArrayToString); scalar_function!(list_join, ArrayToString); scalar_function!(array_ndims, ArrayNdims); scalar_function!(list_ndims, ArrayNdims); +scalar_function!(array_prepend, ArrayPrepend); +scalar_function!(array_push_front, ArrayPrepend); +scalar_function!(list_prepend, ArrayPrepend); +scalar_function!(list_push_front, ArrayPrepend); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -604,6 +608,10 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(list_join))?; m.add_wrapped(wrap_pyfunction!(array_ndims))?; m.add_wrapped(wrap_pyfunction!(list_ndims))?; + m.add_wrapped(wrap_pyfunction!(array_prepend))?; + m.add_wrapped(wrap_pyfunction!(array_push_front))?; + m.add_wrapped(wrap_pyfunction!(list_prepend))?; + m.add_wrapped(wrap_pyfunction!(list_push_front))?; Ok(()) } From 3fdd015b369dc63e5e89a1af47cd0026cb86374d Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 16:52:20 +0800 Subject: [PATCH 07/12] Add array_pop_back and array_pop_front --- datafusion/tests/test_functions.py | 8 ++++++++ src/functions.rs | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index c0f94ce4..5b83588b 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -327,6 +327,14 @@ def py_indexof(arr, v): f.list_push_front(literal(99.0), col), lambda: [np.insert(arr, 0, 99.0) for arr in data], ], + [ + f.array_pop_back(col), + lambda: [arr[:-1] for arr in data], + ], + [ + f.array_pop_front(col), + lambda: [arr[1:] for arr in data], + ], ] for stmt, py_expr in test_items: diff --git a/src/functions.rs b/src/functions.rs index 872c8fb2..7b5a6d5a 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -389,6 +389,8 @@ scalar_function!(array_prepend, ArrayPrepend); scalar_function!(array_push_front, ArrayPrepend); scalar_function!(list_prepend, ArrayPrepend); scalar_function!(list_push_front, ArrayPrepend); +scalar_function!(array_pop_back, ArrayPopBack); +scalar_function!(array_pop_front, ArrayPopFront); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -612,6 +614,8 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_push_front))?; m.add_wrapped(wrap_pyfunction!(list_prepend))?; m.add_wrapped(wrap_pyfunction!(list_push_front))?; + m.add_wrapped(wrap_pyfunction!(array_pop_back))?; + m.add_wrapped(wrap_pyfunction!(array_pop_front))?; Ok(()) } From 1d3efff614fc8cceb299a1b34d8dd733ebcdd49d Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 17:42:46 +0800 Subject: [PATCH 08/12] Add array_positions and list_positions --- datafusion/tests/test_functions.py | 14 +++++++++++++- src/functions.rs | 4 ++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 5b83588b..72ea2b22 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -200,7 +200,7 @@ def test_math_functions(): def test_array_functions(): - data = [[1.0, 2.0, 3.0], [4.0, 5.0], [6.0]] + data = [[1.0, 2.0, 3.0, 3.0], [4.0, 5.0, 3.0], [6.0]] ctx = SessionContext() batch = pa.RecordBatch.from_arrays( [np.array(data, dtype=object)], names=["arr"] @@ -303,6 +303,18 @@ def py_indexof(arr, v): f.list_indexof(col, literal(1.0)), lambda: [py_indexof(r, 1.0) for r in data], ], + [ + f.array_positions(col, literal(1.0)), + lambda: [ + [i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data + ], + ], + [ + f.list_positions(col, literal(1.0)), + lambda: [ + [i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data + ], + ], [ f.array_ndims(col), lambda: [np.array(r).ndim for r in data], diff --git a/src/functions.rs b/src/functions.rs index 7b5a6d5a..db67b156 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -379,6 +379,8 @@ scalar_function!(array_position, ArrayPosition); scalar_function!(array_indexof, ArrayPosition); scalar_function!(list_position, ArrayPosition); scalar_function!(list_indexof, ArrayPosition); +scalar_function!(array_positions, ArrayPositions); +scalar_function!(list_positions, ArrayPositions); scalar_function!(array_to_string, ArrayToString); scalar_function!(array_join, ArrayToString); scalar_function!(list_to_string, ArrayToString); @@ -604,6 +606,8 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_indexof))?; m.add_wrapped(wrap_pyfunction!(list_position))?; m.add_wrapped(wrap_pyfunction!(list_indexof))?; + m.add_wrapped(wrap_pyfunction!(array_positions))?; + m.add_wrapped(wrap_pyfunction!(list_positions))?; m.add_wrapped(wrap_pyfunction!(array_to_string))?; m.add_wrapped(wrap_pyfunction!(array_join))?; m.add_wrapped(wrap_pyfunction!(list_to_string))?; From 4289c61c544c7627c54a5dbc34342305a079ba6a Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 17:44:44 +0800 Subject: [PATCH 09/12] Add array_remove, list_remove, array_remove_n, list_remove_n, array_remove_all and list_remove_all --- datafusion/tests/test_functions.py | 36 ++++++++++++++++++++++++++++++ src/functions.rs | 12 ++++++++++ 2 files changed, 48 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 72ea2b22..0105ffd9 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -213,6 +213,18 @@ def py_indexof(arr, v): except ValueError: return np.nan + def py_arr_remove(arr, v, n=None): + new_arr = arr[:] + found = 0 + while found != n: + try: + new_arr.remove(v) + found += 1 + except ValueError: + break + + return new_arr + col = column("arr") test_items = [ [ @@ -347,6 +359,30 @@ def py_indexof(arr, v): f.array_pop_front(col), lambda: [arr[1:] for arr in data], ], + [ + f.array_remove(col, literal(3.0)), + lambda: [py_arr_remove(arr, 3.0, 1) for arr in data], + ], + [ + f.list_remove(col, literal(3.0)), + lambda: [py_arr_remove(arr, 3.0, 1) for arr in data], + ], + [ + f.array_remove_n(col, literal(3.0), literal(2)), + lambda: [py_arr_remove(arr, 3.0, 2) for arr in data], + ], + [ + f.list_remove_n(col, literal(3.0), literal(2)), + lambda: [py_arr_remove(arr, 3.0, 2) for arr in data], + ], + [ + f.array_remove_all(col, literal(3.0)), + lambda: [py_arr_remove(arr, 3.0) for arr in data], + ], + [ + f.list_remove_all(col, literal(3.0)), + lambda: [py_arr_remove(arr, 3.0) for arr in data], + ], ] for stmt, py_expr in test_items: diff --git a/src/functions.rs b/src/functions.rs index db67b156..f427d316 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -393,6 +393,12 @@ scalar_function!(list_prepend, ArrayPrepend); scalar_function!(list_push_front, ArrayPrepend); scalar_function!(array_pop_back, ArrayPopBack); scalar_function!(array_pop_front, ArrayPopFront); +scalar_function!(array_remove, ArrayRemove); +scalar_function!(list_remove, ArrayRemove); +scalar_function!(array_remove_n, ArrayRemoveN); +scalar_function!(list_remove_n, ArrayRemoveN); +scalar_function!(array_remove_all, ArrayRemoveAll); +scalar_function!(list_remove_all, ArrayRemoveAll); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -620,6 +626,12 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(list_push_front))?; m.add_wrapped(wrap_pyfunction!(array_pop_back))?; m.add_wrapped(wrap_pyfunction!(array_pop_front))?; + m.add_wrapped(wrap_pyfunction!(array_remove))?; + m.add_wrapped(wrap_pyfunction!(list_remove))?; + m.add_wrapped(wrap_pyfunction!(array_remove_n))?; + m.add_wrapped(wrap_pyfunction!(list_remove_n))?; + m.add_wrapped(wrap_pyfunction!(array_remove_all))?; + m.add_wrapped(wrap_pyfunction!(list_remove_all))?; Ok(()) } From c3d9e70d556074b6d7b5ce900ab488e29e3414d6 Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 20:29:50 +0800 Subject: [PATCH 10/12] Add array_repeat --- datafusion/tests/test_functions.py | 10 ++++++++-- src/functions.rs | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 0105ffd9..54604684 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -383,12 +383,18 @@ def py_arr_remove(arr, v, n=None): f.list_remove_all(col, literal(3.0)), lambda: [py_arr_remove(arr, 3.0) for arr in data], ], + [ + f.array_repeat(col, literal(2)), + lambda: [[arr] * 2 for arr in data], + ], ] for stmt, py_expr in test_items: - query_result = np.array(df.select(stmt).collect()[0].column(0)) + query_result = df.select(stmt).collect()[0].column(0) for a, b in zip(query_result, py_expr()): - np.testing.assert_array_almost_equal(a, b) + np.testing.assert_array_almost_equal( + np.array(a.as_py(), dtype=float), np.array(b, dtype=float) + ) obj_test_items = [ [ diff --git a/src/functions.rs b/src/functions.rs index f427d316..4939bf62 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -399,6 +399,7 @@ scalar_function!(array_remove_n, ArrayRemoveN); scalar_function!(list_remove_n, ArrayRemoveN); scalar_function!(array_remove_all, ArrayRemoveAll); scalar_function!(list_remove_all, ArrayRemoveAll); +scalar_function!(array_repeat, ArrayRepeat); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -632,6 +633,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(list_remove_n))?; m.add_wrapped(wrap_pyfunction!(array_remove_all))?; m.add_wrapped(wrap_pyfunction!(list_remove_all))?; + m.add_wrapped(wrap_pyfunction!(array_repeat))?; Ok(()) } From f0933861d5c0d59ef98c37b5457b5fd8b5e73001 Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 20:54:55 +0800 Subject: [PATCH 11/12] Add array_replace, list_replace, array_replace_n, list_replace_n, array_replace_all, list_replace_all --- datafusion/tests/test_functions.py | 37 ++++++++++++++++++++++++++++++ src/functions.rs | 12 ++++++++++ 2 files changed, 49 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 54604684..eb717dba 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -225,6 +225,19 @@ def py_arr_remove(arr, v, n=None): return new_arr + def py_arr_replace(arr, from_, to, n=None): + new_arr = arr[:] + found = 0 + while found != n: + try: + idx = new_arr.index(from_) + new_arr[idx] = to + found += 1 + except ValueError: + break + + return new_arr + col = column("arr") test_items = [ [ @@ -387,6 +400,30 @@ def py_arr_remove(arr, v, n=None): f.array_repeat(col, literal(2)), lambda: [[arr] * 2 for arr in data], ], + [ + f.array_replace(col, literal(3.0), literal(4.0)), + lambda: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], + ], + [ + f.list_replace(col, literal(3.0), literal(4.0)), + lambda: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], + ], + [ + f.array_replace_n(col, literal(3.0), literal(4.0), literal(1)), + lambda: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], + ], + [ + f.list_replace_n(col, literal(3.0), literal(4.0), literal(2)), + lambda: [py_arr_replace(arr, 3.0, 4.0, 2) for arr in data], + ], + [ + f.array_replace_all(col, literal(3.0), literal(4.0)), + lambda: [py_arr_replace(arr, 3.0, 4.0) for arr in data], + ], + [ + f.list_replace_all(col, literal(3.0), literal(4.0)), + lambda: [py_arr_replace(arr, 3.0, 4.0) for arr in data], + ], ] for stmt, py_expr in test_items: diff --git a/src/functions.rs b/src/functions.rs index 4939bf62..c7943752 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -400,6 +400,12 @@ scalar_function!(list_remove_n, ArrayRemoveN); scalar_function!(array_remove_all, ArrayRemoveAll); scalar_function!(list_remove_all, ArrayRemoveAll); scalar_function!(array_repeat, ArrayRepeat); +scalar_function!(array_replace, ArrayReplace); +scalar_function!(list_replace, ArrayReplace); +scalar_function!(array_replace_n, ArrayReplaceN); +scalar_function!(list_replace_n, ArrayReplaceN); +scalar_function!(array_replace_all, ArrayReplaceAll); +scalar_function!(list_replace_all, ArrayReplaceAll); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -634,6 +640,12 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_remove_all))?; m.add_wrapped(wrap_pyfunction!(list_remove_all))?; m.add_wrapped(wrap_pyfunction!(array_repeat))?; + m.add_wrapped(wrap_pyfunction!(array_replace))?; + m.add_wrapped(wrap_pyfunction!(list_replace))?; + m.add_wrapped(wrap_pyfunction!(array_replace_n))?; + m.add_wrapped(wrap_pyfunction!(list_replace_n))?; + m.add_wrapped(wrap_pyfunction!(array_replace_all))?; + m.add_wrapped(wrap_pyfunction!(list_replace_all))?; Ok(()) } From e5649c97d4bb139197f81aac1b1ed255cf76fa27 Mon Sep 17 00:00:00 2001 From: ongchi Date: Tue, 9 Jan 2024 21:08:36 +0800 Subject: [PATCH 12/12] Add array_slice and list_slice --- datafusion/tests/test_functions.py | 8 ++++++++ src/functions.rs | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index eb717dba..7e772589 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -424,6 +424,14 @@ def py_arr_replace(arr, from_, to, n=None): f.list_replace_all(col, literal(3.0), literal(4.0)), lambda: [py_arr_replace(arr, 3.0, 4.0) for arr in data], ], + [ + f.array_slice(col, literal(2), literal(4)), + lambda: [arr[1:4] for arr in data], + ], + [ + f.list_slice(col, literal(-1), literal(2)), + lambda: [arr[-1:2] for arr in data], + ], ] for stmt, py_expr in test_items: diff --git a/src/functions.rs b/src/functions.rs index c7943752..e3c485a3 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -406,6 +406,8 @@ scalar_function!(array_replace_n, ArrayReplaceN); scalar_function!(list_replace_n, ArrayReplaceN); scalar_function!(array_replace_all, ArrayReplaceAll); scalar_function!(list_replace_all, ArrayReplaceAll); +scalar_function!(array_slice, ArraySlice); +scalar_function!(list_slice, ArraySlice); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -646,6 +648,8 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(list_replace_n))?; m.add_wrapped(wrap_pyfunction!(array_replace_all))?; m.add_wrapped(wrap_pyfunction!(list_replace_all))?; + m.add_wrapped(wrap_pyfunction!(array_slice))?; + m.add_wrapped(wrap_pyfunction!(list_slice))?; Ok(()) }