diff --git a/velox/docs/functions/presto/array.rst b/velox/docs/functions/presto/array.rst index ffc691c44458..3edd19c7731e 100644 --- a/velox/docs/functions/presto/array.rst +++ b/velox/docs/functions/presto/array.rst @@ -49,6 +49,10 @@ Array Functions SELECT array_distinct(ARRAY [1, 2, 1]); -- [1, 2] SELECT array_distinct(ARRAY [1, NULL, NULL]); -- [1, NULL] +.. function:: array_dupes(array(E)) -> boolean + + This is an alias for :func:`array_duplicates(array(E))` + .. function:: array_duplicates(array(E)) -> array(E) Returns a set of elements that occur more than once in array. @@ -77,6 +81,10 @@ Array Functions SELECT array_frequency(ARRAY ["knock", "knock", "who", "?"]); -- {"knock" -> 2, "who" -> 1, "?" -> 1} SELECT array_frequency(ARRAY []); -- {} +.. function:: array_has_dupes(array(E)) -> boolean + + This is an alias for :func:`array_has_duplicates(array(E))`. + .. function:: array_has_duplicates(array(E)) -> boolean Returns a boolean: whether array has any elements that occur more than once. diff --git a/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp b/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp index a83773fa7c04..821660523997 100644 --- a/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp +++ b/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp @@ -70,7 +70,7 @@ inline void registerArrayHasDuplicatesFunctions(const std::string& prefix) { registerFunction< ParameterBinder, bool, - Array>({prefix + "array_has_duplicates"}); + Array>({prefix + "array_has_duplicates", prefix + "array_has_dupes"}); } template @@ -133,6 +133,7 @@ void registerArrayFunctions(const std::string& prefix) { VELOX_REGISTER_VECTOR_FUNCTION(udf_array_distinct, prefix + "array_distinct"); VELOX_REGISTER_VECTOR_FUNCTION( udf_array_duplicates, prefix + "array_duplicates"); + VELOX_REGISTER_VECTOR_FUNCTION(udf_array_duplicates, prefix + "array_dupes"); VELOX_REGISTER_VECTOR_FUNCTION( udf_array_intersect, prefix + "array_intersect"); VELOX_REGISTER_VECTOR_FUNCTION(udf_array_contains, prefix + "contains"); diff --git a/velox/functions/prestosql/tests/ArrayDuplicatesTest.cpp b/velox/functions/prestosql/tests/ArrayDuplicatesTest.cpp index 3bf1edf7cff9..0504cbad15de 100644 --- a/velox/functions/prestosql/tests/ArrayDuplicatesTest.cpp +++ b/velox/functions/prestosql/tests/ArrayDuplicatesTest.cpp @@ -36,7 +36,7 @@ class ArrayDuplicatesTest : public FunctionBaseTest { } // Execute test for bigint type. - void testBigint() { + void testBigint(const std::string& funcName) { auto array = makeNullableArrayVector({ {}, {1, @@ -72,7 +72,123 @@ class ArrayDuplicatesTest : public FunctionBaseTest { {std::nullopt, 0, 1, std::numeric_limits::max()}, }); - testExpr(expected, "array_duplicates(C0)", {array}); + testExpr(expected, fmt::format("{}(C0)", funcName), {array}); + } + + void testinlineStringArrays(const std::string& funcName) { + using S = StringView; + + auto array = makeNullableArrayVector({ + {}, + {S("")}, + {std::nullopt}, + {S("a"), S("b")}, + {S("a"), std::nullopt, S("b")}, + {S("a"), S("a")}, + {S("b"), S("a"), S("b"), S("a"), S("a")}, + {std::nullopt, std::nullopt}, + {S("b"), std::nullopt, S("a"), S("a"), std::nullopt, S("b")}, + }); + + auto expected = makeNullableArrayVector({ + {}, + {}, + {}, + {}, + {}, + {S("a")}, + {S("a"), S("b")}, + {std::nullopt}, + {std::nullopt, S("a"), S("b")}, + }); + + testExpr(expected, fmt::format("{}(C0)", funcName), {array}); + } + + void teststringArrays(const std::string& funcName) { + using S = StringView; + + auto array = makeNullableArrayVector({ + {S("red shiny car ahead"), S("blue clear sky above")}, + {S("blue clear sky above"), + S("yellow rose flowers"), + std::nullopt, + S("blue clear sky above"), + S("orange beautiful sunset")}, + { + S("red shiny car ahead"), + std::nullopt, + S("purple is an elegant color"), + S("red shiny car ahead"), + S("green plants make us happy"), + S("purple is an elegant color"), + std::nullopt, + S("purple is an elegant color"), + }, + }); + + auto expected = makeNullableArrayVector({ + {}, + {S("blue clear sky above")}, + {std::nullopt, + S("purple is an elegant color"), + S("red shiny car ahead")}, + }); + + testExpr(expected, fmt::format("{}(C0)", funcName), {array}); + } + + void testNonContiguousRows(const std::string& funcName) { + auto c0 = makeFlatVector(4, [](auto row) { return row; }); + auto c1 = makeArrayVector({ + {1, 1, 2, 3, 3}, + {1, 1, 2, 3, 4, 4}, + {1, 1, 2, 3, 4, 5, 5}, + {1, 1, 2, 3, 3, 4, 5, 6, 6}, + }); + + auto c2 = makeArrayVector({ + {0, 0, 1, 1, 2, 3, 3}, + {0, 0, 1, 1, 2, 3, 4, 4}, + {0, 0, 1, 1, 2, 3, 4, 5, 5}, + {0, 0, 1, 1, 2, 3, 4, 5, 6, 6}, + }); + + auto expected = makeArrayVector({ + {1, 3}, + {0, 1, 4}, + {1, 5}, + {0, 1, 6}, + }); + + auto result = evaluate( + fmt::format("if(c0 % 2 = 0, {}(c1), {}(c2))", funcName, funcName), + makeRowVector({c0, c1, c2})); + assertEqualVectors(expected, result); + } + + void testConstant(const std::string& funcName) { + vector_size_t size = 1'000; + auto data = + makeArrayVector({{1, 2, 3}, {4, 5, 4, 5}, {6, 6, 6, 6}}); + + auto evaluateConstant = [&](vector_size_t row, const VectorPtr& vector) { + return evaluate( + fmt::format("{}(c0)", funcName), + makeRowVector({BaseVector::wrapInConstant(size, row, vector)})); + }; + + auto result = evaluateConstant(0, data); + auto expected = makeConstantArray(size, {}); + assertEqualVectors(expected, result); + + result = evaluateConstant(1, data); + expected = makeConstantArray(size, {4, 5}); + assertEqualVectors(expected, result); + + result = evaluateConstant(2, data); + expected = makeConstantArray(size, {6}); + assertEqualVectors(expected, result); } }; @@ -80,120 +196,28 @@ class ArrayDuplicatesTest : public FunctionBaseTest { // Test integer arrays. TEST_F(ArrayDuplicatesTest, integerArrays) { - testBigint(); + testBigint("array_duplicates"); + testBigint("array_dupes"); } // Test inline (short) strings. TEST_F(ArrayDuplicatesTest, inlineStringArrays) { - using S = StringView; - - auto array = makeNullableArrayVector({ - {}, - {S("")}, - {std::nullopt}, - {S("a"), S("b")}, - {S("a"), std::nullopt, S("b")}, - {S("a"), S("a")}, - {S("b"), S("a"), S("b"), S("a"), S("a")}, - {std::nullopt, std::nullopt}, - {S("b"), std::nullopt, S("a"), S("a"), std::nullopt, S("b")}, - }); - - auto expected = makeNullableArrayVector({ - {}, - {}, - {}, - {}, - {}, - {S("a")}, - {S("a"), S("b")}, - {std::nullopt}, - {std::nullopt, S("a"), S("b")}, - }); - - testExpr(expected, "array_duplicates(C0)", {array}); + testinlineStringArrays("array_duplicates"); + testinlineStringArrays("array_dupes"); } // Test non-inline (> 12 character length) strings. TEST_F(ArrayDuplicatesTest, stringArrays) { - using S = StringView; - - auto array = makeNullableArrayVector({ - {S("red shiny car ahead"), S("blue clear sky above")}, - {S("blue clear sky above"), - S("yellow rose flowers"), - std::nullopt, - S("blue clear sky above"), - S("orange beautiful sunset")}, - { - S("red shiny car ahead"), - std::nullopt, - S("purple is an elegant color"), - S("red shiny car ahead"), - S("green plants make us happy"), - S("purple is an elegant color"), - std::nullopt, - S("purple is an elegant color"), - }, - }); - - auto expected = makeNullableArrayVector({ - {}, - {S("blue clear sky above")}, - {std::nullopt, S("purple is an elegant color"), S("red shiny car ahead")}, - }); - - testExpr(expected, "array_duplicates(C0)", {array}); + teststringArrays("array_duplicates"); + teststringArrays("array_dupes"); } TEST_F(ArrayDuplicatesTest, nonContiguousRows) { - auto c0 = makeFlatVector(4, [](auto row) { return row; }); - auto c1 = makeArrayVector({ - {1, 1, 2, 3, 3}, - {1, 1, 2, 3, 4, 4}, - {1, 1, 2, 3, 4, 5, 5}, - {1, 1, 2, 3, 3, 4, 5, 6, 6}, - }); - - auto c2 = makeArrayVector({ - {0, 0, 1, 1, 2, 3, 3}, - {0, 0, 1, 1, 2, 3, 4, 4}, - {0, 0, 1, 1, 2, 3, 4, 5, 5}, - {0, 0, 1, 1, 2, 3, 4, 5, 6, 6}, - }); - - auto expected = makeArrayVector({ - {1, 3}, - {0, 1, 4}, - {1, 5}, - {0, 1, 6}, - }); - - auto result = evaluate( - "if(c0 % 2 = 0, array_duplicates(c1), array_duplicates(c2))", - makeRowVector({c0, c1, c2})); - assertEqualVectors(expected, result); + testNonContiguousRows("array_duplicates"); + testNonContiguousRows("array_dupes"); } TEST_F(ArrayDuplicatesTest, constant) { - vector_size_t size = 1'000; - auto data = makeArrayVector({{1, 2, 3}, {4, 5, 4, 5}, {6, 6, 6, 6}}); - - auto evaluateConstant = [&](vector_size_t row, const VectorPtr& vector) { - return evaluate( - "array_duplicates(c0)", - makeRowVector({BaseVector::wrapInConstant(size, row, vector)})); - }; - - auto result = evaluateConstant(0, data); - auto expected = makeConstantArray(size, {}); - assertEqualVectors(expected, result); - - result = evaluateConstant(1, data); - expected = makeConstantArray(size, {4, 5}); - assertEqualVectors(expected, result); - - result = evaluateConstant(2, data); - expected = makeConstantArray(size, {6}); - assertEqualVectors(expected, result); + testConstant("array_duplicates"); + testConstant("array_dupes"); } diff --git a/velox/functions/prestosql/tests/ArrayHasDuplicatesTest.cpp b/velox/functions/prestosql/tests/ArrayHasDuplicatesTest.cpp index 4cc8199c92d3..0f85f27c8952 100644 --- a/velox/functions/prestosql/tests/ArrayHasDuplicatesTest.cpp +++ b/velox/functions/prestosql/tests/ArrayHasDuplicatesTest.cpp @@ -34,114 +34,139 @@ class ArrayHasDuplicatesTest : public FunctionBaseTest { evaluate>(expression, makeRowVector(input)); assertEqualVectors(expected, result); } + + void testBigInts(const std::string& funcName) { + auto array = makeNullableArrayVector({ + {}, + {1, + std::numeric_limits::min(), + std::numeric_limits::max()}, + {std::nullopt}, + {1, 2, 3}, + {2, 1, 1, -2}, + {1, 1, 1}, + {-1, std::nullopt, -1, -1}, + {std::nullopt, std::nullopt, std::nullopt}, + {1, -2, -2, 8, -2, 4, 8, 1}, + {std::numeric_limits::max(), + std::numeric_limits::max(), + 1, + std::nullopt, + 0, + 1, + std::nullopt, + 0}, + }); + + auto expected = makeNullableFlatVector( + {false, false, false, false, true, true, true, true, true, true}); + + testExpr(expected, fmt::format("{}(C0)", funcName), {array}); + } + + void testInlineStrings(const std::string& funcName) { + using S = StringView; + + auto array = makeNullableArrayVector({ + {}, + {""_sv}, + {std::nullopt}, + {S("a"), S("b")}, + {S("a"), std::nullopt, S("b")}, + {S("a"), S("a")}, + {S("b"), S("a"), S("b"), S("a"), S("a")}, + {std::nullopt, std::nullopt}, + {S("b"), std::nullopt, S("a"), S("a"), std::nullopt, S("b")}, + }); + + auto expected = makeFlatVector( + {false, false, false, false, false, true, true, true, true}); + + testExpr(expected, fmt::format("{}(C0)", funcName), {array}); + } + + void testLongStrings(const std::string& funcName) { + using S = StringView; + + auto array = makeNullableArrayVector({ + {S("red shiny car ahead"), S("blue clear sky above")}, + {S("blue clear sky above"), + S("yellow rose flowers"), + std::nullopt, + S("blue clear sky above"), + S("orange beautiful sunset")}, + { + S("red shiny car ahead"), + std::nullopt, + S("purple is an elegant color"), + S("red shiny car ahead"), + S("green plants make us happy"), + S("purple is an elegant color"), + std::nullopt, + S("purple is an elegant color"), + }, + }); + auto expected = makeFlatVector({false, true, true}); + testExpr(expected, fmt::format("{}(C0)", funcName), {array}); + } + + void testNullFreeBigints(const std::string& funcName) { + auto array = makeArrayVector({ + {1, + std::numeric_limits::min(), + std::numeric_limits::max()}, + {2, 1, 1, -2}, + {1, 1, 1}, + }); + + auto expected = makeNullableFlatVector({false, true, true}); + + testExpr(expected, fmt::format("{}(C0)", funcName), {array}); + } + + void testNullFreeStrings(const std::string& funcName) { + using S = StringView; + + auto array = makeArrayVector( + {{S("red shiny car ahead"), S("blue clear sky above")}, + {S("red shiny car ahead"), + S("blue clear sky above"), + S("blue clear sky above")}, + {S("a"), S("b")}, + {S("a"), S("b"), S("b")} + + }); + auto expected = makeFlatVector({false, true, false, true}); + testExpr(expected, fmt::format("{}(C0)", funcName), {array}); + } }; } // namespace // Test bigint arrays. TEST_F(ArrayHasDuplicatesTest, bigints) { - auto array = makeNullableArrayVector({ - {}, - {1, - std::numeric_limits::min(), - std::numeric_limits::max()}, - {std::nullopt}, - {1, 2, 3}, - {2, 1, 1, -2}, - {1, 1, 1}, - {-1, std::nullopt, -1, -1}, - {std::nullopt, std::nullopt, std::nullopt}, - {1, -2, -2, 8, -2, 4, 8, 1}, - {std::numeric_limits::max(), - std::numeric_limits::max(), - 1, - std::nullopt, - 0, - 1, - std::nullopt, - 0}, - }); - - auto expected = makeNullableFlatVector( - {false, false, false, false, true, true, true, true, true, true}); - - testExpr(expected, "array_has_duplicates(C0)", {array}); + testBigInts("array_has_duplicates"); + testBigInts("array_has_dupes"); } // Test inline (short) strings. TEST_F(ArrayHasDuplicatesTest, inlineStrings) { - using S = StringView; - - auto array = makeNullableArrayVector({ - {}, - {""_sv}, - {std::nullopt}, - {S("a"), S("b")}, - {S("a"), std::nullopt, S("b")}, - {S("a"), S("a")}, - {S("b"), S("a"), S("b"), S("a"), S("a")}, - {std::nullopt, std::nullopt}, - {S("b"), std::nullopt, S("a"), S("a"), std::nullopt, S("b")}, - }); - - auto expected = makeFlatVector( - {false, false, false, false, false, true, true, true, true}); - - testExpr(expected, "array_has_duplicates(C0)", {array}); + testInlineStrings("array_has_duplicates"); + testInlineStrings("array_has_dupes"); } // Test non-inline (> 12 character length) strings. TEST_F(ArrayHasDuplicatesTest, longStrings) { - using S = StringView; - - auto array = makeNullableArrayVector({ - {S("red shiny car ahead"), S("blue clear sky above")}, - {S("blue clear sky above"), - S("yellow rose flowers"), - std::nullopt, - S("blue clear sky above"), - S("orange beautiful sunset")}, - { - S("red shiny car ahead"), - std::nullopt, - S("purple is an elegant color"), - S("red shiny car ahead"), - S("green plants make us happy"), - S("purple is an elegant color"), - std::nullopt, - S("purple is an elegant color"), - }, - }); - auto expected = makeFlatVector({false, true, true}); - testExpr(expected, "array_has_duplicates(C0)", {array}); + testLongStrings("array_has_duplicates"); + testLongStrings("array_has_dupes"); } TEST_F(ArrayHasDuplicatesTest, nullFreeBigints) { - auto array = makeArrayVector({ - {1, - std::numeric_limits::min(), - std::numeric_limits::max()}, - {2, 1, 1, -2}, - {1, 1, 1}, - }); - - auto expected = makeNullableFlatVector({false, true, true}); - - testExpr(expected, "array_has_duplicates(C0)", {array}); + testNullFreeBigints("array_has_duplicates"); + testNullFreeBigints("array_has_dupes"); } TEST_F(ArrayHasDuplicatesTest, nullFreeStrings) { - using S = StringView; - - auto array = makeArrayVector( - {{S("red shiny car ahead"), S("blue clear sky above")}, - {S("red shiny car ahead"), - S("blue clear sky above"), - S("blue clear sky above")}, - {S("a"), S("b")}, - {S("a"), S("b"), S("b")} - - }); - auto expected = makeFlatVector({false, true, false, true}); - testExpr(expected, "array_has_duplicates(C0)", {array}); + testNullFreeStrings("array_has_duplicates"); + testNullFreeStrings("array_has_dupes"); }