From 3703527942ff9c939b542d3e44a7f8166ebd9071 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Tue, 24 Dec 2024 11:10:28 +0800 Subject: [PATCH 1/4] feat: Support decimal type for Spark in function --- velox/docs/functions/spark/array.rst | 2 +- velox/functions/sparksql/In.cpp | 18 ++++++++ velox/functions/sparksql/tests/InTest.cpp | 51 +++++++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/velox/docs/functions/spark/array.rst b/velox/docs/functions/spark/array.rst index 356232e96639..c594f5a4acd1 100644 --- a/velox/docs/functions/spark/array.rst +++ b/velox/docs/functions/spark/array.rst @@ -196,7 +196,7 @@ Array Functions .. spark:function:: in(value, array(E)) -> boolean Returns true if value matches at least one of the elements of the array. - Supports BOOLEAN, REAL, DOUBLE, BIGINT, VARCHAR, TIMESTAMP, DATE input types. + Supports BOOLEAN, REAL, DOUBLE, BIGINT, VARCHAR, TIMESTAMP, DATE, DECIMAL input types. .. spark:function:: shuffle(array(E), seed) -> array(E) diff --git a/velox/functions/sparksql/In.cpp b/velox/functions/sparksql/In.cpp index cddfbaab9ed7..91b1c144d1b5 100644 --- a/velox/functions/sparksql/In.cpp +++ b/velox/functions/sparksql/In.cpp @@ -130,6 +130,22 @@ void registerInFn(const std::string& prefix) { {prefix + "in"}); } +void registerInFnForShortDecimal(const std::string& prefix) { + registerFunction< + InFunctionOuter>::template Inner, + bool, + ShortDecimal, + Array>>({prefix + "in"}); +} + +void registerInFnForLongDecimal(const std::string& prefix) { + registerFunction< + InFunctionOuter>::template Inner, + bool, + LongDecimal, + Array>>({prefix + "in"}); +} + } // namespace void registerIn(const std::string& prefix) { @@ -143,6 +159,8 @@ void registerIn(const std::string& prefix) { registerInFn(prefix); registerInFn(prefix); registerInFn(prefix); + registerInFnForShortDecimal(prefix); + registerInFnForLongDecimal(prefix); } } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/tests/InTest.cpp b/velox/functions/sparksql/tests/InTest.cpp index 5daaecd1cc94..d26e12d9ee0e 100644 --- a/velox/functions/sparksql/tests/InTest.cpp +++ b/velox/functions/sparksql/tests/InTest.cpp @@ -194,6 +194,57 @@ TEST_F(InTest, Bool) { EXPECT_EQ(in(false, {false}), true); } +TEST_F(InTest, shortDecimal) { + EXPECT_EQ(in(1, {1, 2}, DECIMAL(2, 1)), true); + EXPECT_EQ(in(2, {1, 2}, DECIMAL(10, 5)), true); + EXPECT_EQ(in(3, {1, 2}, DECIMAL(17, 11)), false); + EXPECT_EQ(in(std::nullopt, {1, 2}, DECIMAL(3, 2)), std::nullopt); + EXPECT_EQ(in(1, {1, std::nullopt, 2}, DECIMAL(3, 2)), true); + EXPECT_EQ(in(2, {1, std::nullopt, 2}, DECIMAL(3, 2)), true); + EXPECT_EQ(in(3, {1, std::nullopt, 2}, DECIMAL(3, 2)), std::nullopt); + EXPECT_EQ( + in(std::nullopt, {1, std::nullopt, 2}, DECIMAL(3, 2)), + std::nullopt); + EXPECT_EQ( + in( + DecimalUtil::kShortDecimalMin, + {DecimalUtil::kShortDecimalMin, DecimalUtil::kShortDecimalMax}, + DECIMAL(18, 9)), + true); + EXPECT_EQ( + in( + DecimalUtil::kShortDecimalMax, + {DecimalUtil::kShortDecimalMin, DecimalUtil::kShortDecimalMax}, + DECIMAL(18, 9)), + true); +} + +TEST_F(InTest, longDecimal) { + EXPECT_EQ(in(1, {1, 2}, DECIMAL(21, 2)), true); + EXPECT_EQ(in(2, {1, 2}, DECIMAL(29, 10)), true); + EXPECT_EQ(in(3, {1, 2}, DECIMAL(35, 20)), false); + EXPECT_EQ(in(std::nullopt, {1, 2}, DECIMAL(23, 2)), std::nullopt); + EXPECT_EQ(in(1, {1, std::nullopt, 2}, DECIMAL(23, 2)), true); + EXPECT_EQ(in(2, {1, std::nullopt, 2}, DECIMAL(23, 2)), true); + EXPECT_EQ( + in(3, {1, std::nullopt, 2}, DECIMAL(23, 2)), std::nullopt); + EXPECT_EQ( + in(std::nullopt, {1, std::nullopt, 2}, DECIMAL(23, 2)), + std::nullopt); + EXPECT_EQ( + in( + DecimalUtil::kLongDecimalMin, + {DecimalUtil::kLongDecimalMin, DecimalUtil::kLongDecimalMax}, + DECIMAL(38, 19)), + true); + EXPECT_EQ( + in( + DecimalUtil::kLongDecimalMax, + {DecimalUtil::kLongDecimalMin, DecimalUtil::kLongDecimalMax}, + DECIMAL(38, 19)), + true); +} + TEST_F(InTest, Const) { const auto eval = [&](const std::string& expr) { return evaluateOnce(expr, false); From 93b98318daef19bfa7c6af7c7e1c86793d231b70 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Tue, 24 Dec 2024 18:35:05 +0800 Subject: [PATCH 2/4] address comments --- velox/functions/sparksql/In.cpp | 8 ++------ velox/functions/sparksql/tests/InTest.cpp | 15 --------------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/velox/functions/sparksql/In.cpp b/velox/functions/sparksql/In.cpp index 91b1c144d1b5..8aea97fc2231 100644 --- a/velox/functions/sparksql/In.cpp +++ b/velox/functions/sparksql/In.cpp @@ -130,15 +130,12 @@ void registerInFn(const std::string& prefix) { {prefix + "in"}); } -void registerInFnForShortDecimal(const std::string& prefix) { +void registerInFnForDecimal(const std::string& prefix) { registerFunction< InFunctionOuter>::template Inner, bool, ShortDecimal, Array>>({prefix + "in"}); -} - -void registerInFnForLongDecimal(const std::string& prefix) { registerFunction< InFunctionOuter>::template Inner, bool, @@ -159,8 +156,7 @@ void registerIn(const std::string& prefix) { registerInFn(prefix); registerInFn(prefix); registerInFn(prefix); - registerInFnForShortDecimal(prefix); - registerInFnForLongDecimal(prefix); + registerInFnForDecimal(prefix); } } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/tests/InTest.cpp b/velox/functions/sparksql/tests/InTest.cpp index d26e12d9ee0e..7d61cae7b14e 100644 --- a/velox/functions/sparksql/tests/InTest.cpp +++ b/velox/functions/sparksql/tests/InTest.cpp @@ -198,13 +198,6 @@ TEST_F(InTest, shortDecimal) { EXPECT_EQ(in(1, {1, 2}, DECIMAL(2, 1)), true); EXPECT_EQ(in(2, {1, 2}, DECIMAL(10, 5)), true); EXPECT_EQ(in(3, {1, 2}, DECIMAL(17, 11)), false); - EXPECT_EQ(in(std::nullopt, {1, 2}, DECIMAL(3, 2)), std::nullopt); - EXPECT_EQ(in(1, {1, std::nullopt, 2}, DECIMAL(3, 2)), true); - EXPECT_EQ(in(2, {1, std::nullopt, 2}, DECIMAL(3, 2)), true); - EXPECT_EQ(in(3, {1, std::nullopt, 2}, DECIMAL(3, 2)), std::nullopt); - EXPECT_EQ( - in(std::nullopt, {1, std::nullopt, 2}, DECIMAL(3, 2)), - std::nullopt); EXPECT_EQ( in( DecimalUtil::kShortDecimalMin, @@ -223,14 +216,6 @@ TEST_F(InTest, longDecimal) { EXPECT_EQ(in(1, {1, 2}, DECIMAL(21, 2)), true); EXPECT_EQ(in(2, {1, 2}, DECIMAL(29, 10)), true); EXPECT_EQ(in(3, {1, 2}, DECIMAL(35, 20)), false); - EXPECT_EQ(in(std::nullopt, {1, 2}, DECIMAL(23, 2)), std::nullopt); - EXPECT_EQ(in(1, {1, std::nullopt, 2}, DECIMAL(23, 2)), true); - EXPECT_EQ(in(2, {1, std::nullopt, 2}, DECIMAL(23, 2)), true); - EXPECT_EQ( - in(3, {1, std::nullopt, 2}, DECIMAL(23, 2)), std::nullopt); - EXPECT_EQ( - in(std::nullopt, {1, std::nullopt, 2}, DECIMAL(23, 2)), - std::nullopt); EXPECT_EQ( in( DecimalUtil::kLongDecimalMin, From ee453f272bb94d7cf744af496c58c2e0e9cd5432 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Fri, 27 Dec 2024 10:28:35 +0800 Subject: [PATCH 3/4] address comments --- velox/functions/sparksql/In.cpp | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/velox/functions/sparksql/In.cpp b/velox/functions/sparksql/In.cpp index 8aea97fc2231..dd5c02767afb 100644 --- a/velox/functions/sparksql/In.cpp +++ b/velox/functions/sparksql/In.cpp @@ -129,20 +129,6 @@ void registerInFn(const std::string& prefix) { registerFunction::template Inner, bool, T, Array>( {prefix + "in"}); } - -void registerInFnForDecimal(const std::string& prefix) { - registerFunction< - InFunctionOuter>::template Inner, - bool, - ShortDecimal, - Array>>({prefix + "in"}); - registerFunction< - InFunctionOuter>::template Inner, - bool, - LongDecimal, - Array>>({prefix + "in"}); -} - } // namespace void registerIn(const std::string& prefix) { @@ -156,7 +142,8 @@ void registerIn(const std::string& prefix) { registerInFn(prefix); registerInFn(prefix); registerInFn(prefix); - registerInFnForDecimal(prefix); + registerInFn>(prefix); + registerInFn>(prefix); } } // namespace facebook::velox::functions::sparksql From 1b2b1d65c6fbb0516784c7f55c6bc87f843130cf Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Fri, 27 Dec 2024 13:49:36 +0800 Subject: [PATCH 4/4] address comments --- velox/docs/functions/spark/decimal.rst | 4 ++++ velox/functions/sparksql/In.cpp | 1 + 2 files changed, 5 insertions(+) diff --git a/velox/docs/functions/spark/decimal.rst b/velox/docs/functions/spark/decimal.rst index 654f057b2607..69ff40fbf422 100644 --- a/velox/docs/functions/spark/decimal.rst +++ b/velox/docs/functions/spark/decimal.rst @@ -115,6 +115,10 @@ Returns NULL when the actual result cannot be represented with the calculated de Decimal Functions ----------------- +.. spark:function:: in(x: decimal(p, s), array(decimal(p, s))) -> boolean + + Returns true if ``x`` matches at least one of the elements of the array. + .. spark:function:: unaryminus(x: decimal(p, s)) -> r: decimal(p, s) Returns negated value of x (r = -x). Corresponds to Spark's operator ``-``. diff --git a/velox/functions/sparksql/In.cpp b/velox/functions/sparksql/In.cpp index dd5c02767afb..dfd5ffe30b72 100644 --- a/velox/functions/sparksql/In.cpp +++ b/velox/functions/sparksql/In.cpp @@ -129,6 +129,7 @@ void registerInFn(const std::string& prefix) { registerFunction::template Inner, bool, T, Array>( {prefix + "in"}); } + } // namespace void registerIn(const std::string& prefix) {