Skip to content

Commit

Permalink
Add alias for array duplicate functions. (facebookincubator#10425)
Browse files Browse the repository at this point in the history
  • Loading branch information
amitkdutta authored and facebook-github-bot committed Jul 9, 2024
1 parent 836bb30 commit e8362c0
Show file tree
Hide file tree
Showing 4 changed files with 253 additions and 195 deletions.
8 changes: 8 additions & 0 deletions velox/docs/functions/presto/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ Array Functions
SELECT array_distinct(ARRAY [1, 2, 1]); -- [1, 2]
SELECT array_distinct(ARRAY [1, NULL, NULL]); -- [1, NULL]

.. function:: array_dupes(array(E)) -> boolean

This is an alias for :func:`array_duplicates(array(E))`

.. function:: array_duplicates(array(E)) -> array(E)

Returns a set of elements that occur more than once in array.
Expand Down Expand Up @@ -77,6 +81,10 @@ Array Functions
SELECT array_frequency(ARRAY ["knock", "knock", "who", "?"]); -- {"knock" -> 2, "who" -> 1, "?" -> 1}
SELECT array_frequency(ARRAY []); -- {}

.. function:: array_has_dupes(array(E)) -> boolean

This is an alias for :func:`array_has_duplicates(array(E))`.

.. function:: array_has_duplicates(array(E)) -> boolean

Returns a boolean: whether array has any elements that occur more than once.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ inline void registerArrayHasDuplicatesFunctions(const std::string& prefix) {
registerFunction<
ParameterBinder<ArrayHasDuplicatesFunction, T>,
bool,
Array<T>>({prefix + "array_has_duplicates"});
Array<T>>({prefix + "array_has_duplicates", prefix + "array_has_dupes"});
}

template <typename T>
Expand Down Expand Up @@ -133,6 +133,7 @@ void registerArrayFunctions(const std::string& prefix) {
VELOX_REGISTER_VECTOR_FUNCTION(udf_array_distinct, prefix + "array_distinct");
VELOX_REGISTER_VECTOR_FUNCTION(
udf_array_duplicates, prefix + "array_duplicates");
VELOX_REGISTER_VECTOR_FUNCTION(udf_array_duplicates, prefix + "array_dupes");
VELOX_REGISTER_VECTOR_FUNCTION(
udf_array_intersect, prefix + "array_intersect");
VELOX_REGISTER_VECTOR_FUNCTION(udf_array_contains, prefix + "contains");
Expand Down
232 changes: 128 additions & 104 deletions velox/functions/prestosql/tests/ArrayDuplicatesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class ArrayDuplicatesTest : public FunctionBaseTest {
}

// Execute test for bigint type.
void testBigint() {
void testBigint(const std::string& funcName) {
auto array = makeNullableArrayVector<int64_t>({
{},
{1,
Expand Down Expand Up @@ -72,128 +72,152 @@ class ArrayDuplicatesTest : public FunctionBaseTest {
{std::nullopt, 0, 1, std::numeric_limits<int64_t>::max()},
});

testExpr(expected, "array_duplicates(C0)", {array});
testExpr(expected, fmt::format("{}(C0)", funcName), {array});
}

void testinlineStringArrays(const std::string& funcName) {
using S = StringView;

auto array = makeNullableArrayVector<StringView>({
{},
{S("")},
{std::nullopt},
{S("a"), S("b")},
{S("a"), std::nullopt, S("b")},
{S("a"), S("a")},
{S("b"), S("a"), S("b"), S("a"), S("a")},
{std::nullopt, std::nullopt},
{S("b"), std::nullopt, S("a"), S("a"), std::nullopt, S("b")},
});

auto expected = makeNullableArrayVector<StringView>({
{},
{},
{},
{},
{},
{S("a")},
{S("a"), S("b")},
{std::nullopt},
{std::nullopt, S("a"), S("b")},
});

testExpr(expected, fmt::format("{}(C0)", funcName), {array});
}

void teststringArrays(const std::string& funcName) {
using S = StringView;

auto array = makeNullableArrayVector<StringView>({
{S("red shiny car ahead"), S("blue clear sky above")},
{S("blue clear sky above"),
S("yellow rose flowers"),
std::nullopt,
S("blue clear sky above"),
S("orange beautiful sunset")},
{
S("red shiny car ahead"),
std::nullopt,
S("purple is an elegant color"),
S("red shiny car ahead"),
S("green plants make us happy"),
S("purple is an elegant color"),
std::nullopt,
S("purple is an elegant color"),
},
});

auto expected = makeNullableArrayVector<StringView>({
{},
{S("blue clear sky above")},
{std::nullopt,
S("purple is an elegant color"),
S("red shiny car ahead")},
});

testExpr(expected, fmt::format("{}(C0)", funcName), {array});
}

void testNonContiguousRows(const std::string& funcName) {
auto c0 = makeFlatVector<int64_t>(4, [](auto row) { return row; });
auto c1 = makeArrayVector<int64_t>({
{1, 1, 2, 3, 3},
{1, 1, 2, 3, 4, 4},
{1, 1, 2, 3, 4, 5, 5},
{1, 1, 2, 3, 3, 4, 5, 6, 6},
});

auto c2 = makeArrayVector<int64_t>({
{0, 0, 1, 1, 2, 3, 3},
{0, 0, 1, 1, 2, 3, 4, 4},
{0, 0, 1, 1, 2, 3, 4, 5, 5},
{0, 0, 1, 1, 2, 3, 4, 5, 6, 6},
});

auto expected = makeArrayVector<int64_t>({
{1, 3},
{0, 1, 4},
{1, 5},
{0, 1, 6},
});

auto result = evaluate<ArrayVector>(
fmt::format("if(c0 % 2 = 0, {}(c1), {}(c2))", funcName, funcName),
makeRowVector({c0, c1, c2}));
assertEqualVectors(expected, result);
}

void testConstant(const std::string& funcName) {
vector_size_t size = 1'000;
auto data =
makeArrayVector<int64_t>({{1, 2, 3}, {4, 5, 4, 5}, {6, 6, 6, 6}});

auto evaluateConstant = [&](vector_size_t row, const VectorPtr& vector) {
return evaluate(
fmt::format("{}(c0)", funcName),
makeRowVector({BaseVector::wrapInConstant(size, row, vector)}));
};

auto result = evaluateConstant(0, data);
auto expected = makeConstantArray<int64_t>(size, {});
assertEqualVectors(expected, result);

result = evaluateConstant(1, data);
expected = makeConstantArray<int64_t>(size, {4, 5});
assertEqualVectors(expected, result);

result = evaluateConstant(2, data);
expected = makeConstantArray<int64_t>(size, {6});
assertEqualVectors(expected, result);
}
};

} // namespace

// Test integer arrays.
TEST_F(ArrayDuplicatesTest, integerArrays) {
testBigint();
testBigint("array_duplicates");
testBigint("array_dupes");
}

// Test inline (short) strings.
TEST_F(ArrayDuplicatesTest, inlineStringArrays) {
using S = StringView;

auto array = makeNullableArrayVector<StringView>({
{},
{S("")},
{std::nullopt},
{S("a"), S("b")},
{S("a"), std::nullopt, S("b")},
{S("a"), S("a")},
{S("b"), S("a"), S("b"), S("a"), S("a")},
{std::nullopt, std::nullopt},
{S("b"), std::nullopt, S("a"), S("a"), std::nullopt, S("b")},
});

auto expected = makeNullableArrayVector<StringView>({
{},
{},
{},
{},
{},
{S("a")},
{S("a"), S("b")},
{std::nullopt},
{std::nullopt, S("a"), S("b")},
});

testExpr(expected, "array_duplicates(C0)", {array});
testinlineStringArrays("array_duplicates");
testinlineStringArrays("array_dupes");
}

// Test non-inline (> 12 character length) strings.
TEST_F(ArrayDuplicatesTest, stringArrays) {
using S = StringView;

auto array = makeNullableArrayVector<StringView>({
{S("red shiny car ahead"), S("blue clear sky above")},
{S("blue clear sky above"),
S("yellow rose flowers"),
std::nullopt,
S("blue clear sky above"),
S("orange beautiful sunset")},
{
S("red shiny car ahead"),
std::nullopt,
S("purple is an elegant color"),
S("red shiny car ahead"),
S("green plants make us happy"),
S("purple is an elegant color"),
std::nullopt,
S("purple is an elegant color"),
},
});

auto expected = makeNullableArrayVector<StringView>({
{},
{S("blue clear sky above")},
{std::nullopt, S("purple is an elegant color"), S("red shiny car ahead")},
});

testExpr(expected, "array_duplicates(C0)", {array});
teststringArrays("array_duplicates");
teststringArrays("array_dupes");
}

TEST_F(ArrayDuplicatesTest, nonContiguousRows) {
auto c0 = makeFlatVector<int64_t>(4, [](auto row) { return row; });
auto c1 = makeArrayVector<int64_t>({
{1, 1, 2, 3, 3},
{1, 1, 2, 3, 4, 4},
{1, 1, 2, 3, 4, 5, 5},
{1, 1, 2, 3, 3, 4, 5, 6, 6},
});

auto c2 = makeArrayVector<int64_t>({
{0, 0, 1, 1, 2, 3, 3},
{0, 0, 1, 1, 2, 3, 4, 4},
{0, 0, 1, 1, 2, 3, 4, 5, 5},
{0, 0, 1, 1, 2, 3, 4, 5, 6, 6},
});

auto expected = makeArrayVector<int64_t>({
{1, 3},
{0, 1, 4},
{1, 5},
{0, 1, 6},
});

auto result = evaluate<ArrayVector>(
"if(c0 % 2 = 0, array_duplicates(c1), array_duplicates(c2))",
makeRowVector({c0, c1, c2}));
assertEqualVectors(expected, result);
testNonContiguousRows("array_duplicates");
testNonContiguousRows("array_dupes");
}

TEST_F(ArrayDuplicatesTest, constant) {
vector_size_t size = 1'000;
auto data = makeArrayVector<int64_t>({{1, 2, 3}, {4, 5, 4, 5}, {6, 6, 6, 6}});

auto evaluateConstant = [&](vector_size_t row, const VectorPtr& vector) {
return evaluate(
"array_duplicates(c0)",
makeRowVector({BaseVector::wrapInConstant(size, row, vector)}));
};

auto result = evaluateConstant(0, data);
auto expected = makeConstantArray<int64_t>(size, {});
assertEqualVectors(expected, result);

result = evaluateConstant(1, data);
expected = makeConstantArray<int64_t>(size, {4, 5});
assertEqualVectors(expected, result);

result = evaluateConstant(2, data);
expected = makeConstantArray<int64_t>(size, {6});
assertEqualVectors(expected, result);
testConstant("array_duplicates");
testConstant("array_dupes");
}
Loading

0 comments on commit e8362c0

Please sign in to comment.