Skip to content

Commit

Permalink
[feature](window_function) support to secondary argument to ignore nu…
Browse files Browse the repository at this point in the history
…ll values in first_value/last_value (#27623)
  • Loading branch information
mrhhsg authored Nov 30, 2023
1 parent e9debca commit 5739167
Show file tree
Hide file tree
Showing 18 changed files with 891 additions and 26 deletions.
27 changes: 21 additions & 6 deletions be/src/vec/aggregate_functions/aggregate_function_window.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,32 @@
namespace doris::vectorized {

template <template <typename> class AggregateFunctionTemplate,
template <typename ColVecType, bool, bool> class Data, template <typename> class Impl,
bool result_is_nullable, bool arg_is_nullable>
template <typename ColVecType, bool, bool> class Data,
template <typename, bool> class Impl, bool result_is_nullable, bool arg_is_nullable>
AggregateFunctionPtr create_function_lead_lag_first_last(const String& name,
const DataTypes& argument_types) {
auto type = remove_nullable(argument_types[0]);
WhichDataType which(*type);

#define DISPATCH(TYPE, COLUMN_TYPE) \
if (which.idx == TypeIndex::TYPE) \
return std::make_shared<AggregateFunctionTemplate< \
Impl<Data<COLUMN_TYPE, result_is_nullable, arg_is_nullable>>>>(argument_types);
bool arg_ignore_null_value = false;
if (argument_types.size() == 2) {
DCHECK(name == "first_value" || name == "last_value") << "invalid function name: " << name;
arg_ignore_null_value = true;
}

#define DISPATCH(TYPE, COLUMN_TYPE) \
if (which.idx == TypeIndex::TYPE) { \
if (arg_ignore_null_value) { \
return std::make_shared<AggregateFunctionTemplate< \
Impl<Data<COLUMN_TYPE, result_is_nullable, arg_is_nullable>, true>>>( \
argument_types); \
} else { \
return std::make_shared<AggregateFunctionTemplate< \
Impl<Data<COLUMN_TYPE, result_is_nullable, arg_is_nullable>, false>>>( \
argument_types); \
} \
}

TYPE_TO_COLUMN_TYPE(DISPATCH)
#undef DISPATCH

Expand Down
37 changes: 33 additions & 4 deletions be/src/vec/aggregate_functions/aggregate_function_window.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ struct LeadLagData {
bool _is_inited = false;
};

template <typename Data>
template <typename Data, bool = false>
struct WindowFunctionLeadImpl : Data {
void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
int64_t frame_end, const IColumn** columns) {
Expand All @@ -328,7 +328,7 @@ struct WindowFunctionLeadImpl : Data {
static const char* name() { return "lead"; }
};

template <typename Data>
template <typename Data, bool = false>
struct WindowFunctionLagImpl : Data {
void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
int64_t frame_end, const IColumn** columns) {
Expand All @@ -350,7 +350,7 @@ struct WindowFunctionLagImpl : Data {
// TODO: first_value && last_value in some corner case will be core,
// if need to simply change it, should set them to always nullable insert into null value, and register in cpp maybe be change
// But it's may be another better way to handle it
template <typename Data>
template <typename Data, bool arg_ignore_null = false>
struct WindowFunctionFirstImpl : Data {
void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
int64_t frame_end, const IColumn** columns) {
Expand All @@ -363,13 +363,27 @@ struct WindowFunctionFirstImpl : Data {
return;
}
frame_start = std::max<int64_t>(frame_start, partition_start);

if constexpr (arg_ignore_null) {
frame_end = std::min<int64_t>(frame_end, partition_end);

auto& second_arg = assert_cast<const ColumnVector<UInt8>&>(*columns[1]);
auto ignore_null_value = second_arg.get_data()[0];

if (ignore_null_value && columns[0]->is_nullable()) {
auto& arg_nullable = assert_cast<const ColumnNullable&>(*columns[0]);
while (frame_start < frame_end - 1 && arg_nullable.is_null_at(frame_start)) {
frame_start++;
}
}
}
this->set_value(columns, frame_start);
}

static const char* name() { return "first_value"; }
};

template <typename Data>
template <typename Data, bool arg_ignore_null = false>
struct WindowFunctionLastImpl : Data {
void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
int64_t frame_end, const IColumn** columns) {
Expand All @@ -380,6 +394,21 @@ struct WindowFunctionLastImpl : Data {
return;
}
frame_end = std::min<int64_t>(frame_end, partition_end);

if constexpr (arg_ignore_null) {
frame_start = std::max<int64_t>(frame_start, partition_start);

auto& second_arg = assert_cast<const ColumnVector<UInt8>&>(*columns[1]);
auto ignore_null_value = second_arg.get_data()[0];

if (ignore_null_value && columns[0]->is_nullable()) {
auto& arg_nullable = assert_cast<const ColumnNullable&>(*columns[0]);
while (frame_start < (frame_end - 1) && arg_nullable.is_null_at(frame_end - 1)) {
frame_end--;
}
}
}

this->set_value(columns, frame_end - 1);
}

Expand Down
20 changes: 18 additions & 2 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,22 @@ private void initAggregateBuiltins() {
null,
"",
"", true));

addBuiltin(AggregateFunction.createAnalyticBuiltin("first_value",
Lists.newArrayList(new ArrayType(t), Type.BOOLEAN), new ArrayType(t), Type.ARRAY,
"",
"",
null,
"",
"", true));

addBuiltin(AggregateFunction.createAnalyticBuiltin("last_value",
Lists.newArrayList(new ArrayType(t), Type.BOOLEAN), new ArrayType(t), Type.ARRAY,
"",
"",
null,
"",
"", true));
}

// Avg
Expand Down Expand Up @@ -1584,7 +1600,7 @@ private void initAggregateBuiltins() {

//vec first_value
addBuiltin(AggregateFunction.createAnalyticBuiltin(
"first_value", Lists.newArrayList(t), t, t,
"first_value", Lists.newArrayList(t, Type.BOOLEAN), t, t,
"",
"",
null,
Expand All @@ -1601,7 +1617,7 @@ private void initAggregateBuiltins() {
false, false));
//vec last_value
addBuiltin(AggregateFunction.createAnalyticBuiltin(
"last_value", Lists.newArrayList(t), t, t,
"last_value", Lists.newArrayList(t, Type.BOOLEAN), t, t,
"",
"",
"",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ public FirstOrLastValue visitFirstValue(FirstValue firstValue, Void ctx) {
&& wf.getLeftBoundary().isNot(FrameBoundType.PRECEDING)) {
windowExpression = windowExpression.withWindowFrame(
wf.withFrameUnits(FrameUnitsType.ROWS).withRightBoundary(wf.getLeftBoundary()));
LastValue lastValue = new LastValue(firstValue.child());
LastValue lastValue = new LastValue(firstValue.children());
windowExpression = windowExpression.withFunction(lastValue);
return lastValue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,41 @@
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.types.BooleanType;
import org.apache.doris.nereids.types.coercion.AnyDataType;

import com.google.common.collect.ImmutableList;

import java.util.List;

/** parent class for first_value() and last_value() */
/**
* parent class for first_value() and last_value()
*/
public abstract class FirstOrLastValue extends WindowFunction
implements UnaryExpression, AlwaysNullable, ExplicitlyCastableSignature {
implements AlwaysNullable, ExplicitlyCastableSignature {

private static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.retArgType(0).args(AnyDataType.INSTANCE_WITHOUT_INDEX)
FunctionSignature.retArgType(0).args(AnyDataType.INSTANCE_WITHOUT_INDEX),
FunctionSignature.retArgType(0).args(AnyDataType.INSTANCE_WITHOUT_INDEX, BooleanType.INSTANCE)
);

public FirstOrLastValue(String name, Expression child, Expression ignoreNullValue) {
super(name, child, ignoreNullValue);
}

public FirstOrLastValue(String name, Expression child) {
super(name, child);
}

public FirstOrLastValue(String name, List<Expression> children) {
super(name, children);
}

public FirstOrLastValue reverse() {
if (this instanceof FirstValue) {
return new LastValue(child());
return new LastValue(children);
} else {
return new FirstValue(child());
return new FirstValue(children);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,18 @@ public FirstValue(Expression child) {
super("first_value", child);
}

public FirstValue(Expression child, Expression ignoreNullValue) {
super("first_value", child, ignoreNullValue);
}

public FirstValue(List<Expression> children) {
super("first_value", children);
}

@Override
public FirstValue withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new FirstValue(children.get(0));
Preconditions.checkArgument(children.size() == 1 || children.size() == 2);
return new FirstValue(children);
}

@Override
Expand All @@ -47,6 +55,6 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {

@Override
public DataType getDataType() {
return child().getDataType();
return child(0).getDataType();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,18 @@ public LastValue(Expression child) {
super("last_value", child);
}

public LastValue(Expression child, Expression ignoreNullValue) {
super("last_value", child, ignoreNullValue);
}

public LastValue(List<Expression> children) {
super("last_value", children);
}

@Override
public LastValue withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new LastValue(children.get(0));
Preconditions.checkArgument(children.size() == 1 || children.size() == 2);
return new LastValue(children);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
21 04-21-11 1 1 1 2 1.0 1 1 \N \N
21 04-21-11 1 1 1 2 1.0 1 1 \N \N
22 04-22-10-21 0 0 1 1 0.5 1 0 \N \N
22 04-22-10-21 0 1 1 2 0.6666666666666666 1 0 \N \N
22 04-22-10-21 1 0 0 1 0.3333333333333333 1 0 \N \N
22 04-22-10-21 1 0 1 1 0.5 1 0 \N \N
23 04-23-10 1 1 1 2 1.0 1 1 \N \N
23 04-23-10 1 1 1 2 1.0 1 1 \N \N
24 02-24-10-21 1 1 1 2 1.0 1 1 \N \N
24 02-24-10-21 1 1 1 2 1.0 1 1 \N \N

-- !select_empty_window --
21 04-21-11 1 \N \N \N \N \N \N \N \N
21 04-21-11 1 1 1 1 1.0 1 1 \N \N
22 04-22-10-21 0 \N \N \N \N \N \N \N 0
22 04-22-10-21 0 1 1 1 1.0 1 1 0 \N
22 04-22-10-21 1 0 0 0 0.0 0 0 \N 1
22 04-22-10-21 1 0 0 0 0.0 0 0 1 \N
23 04-23-10 1 \N \N \N \N \N \N \N \N
23 04-23-10 1 1 1 1 1.0 1 1 \N \N
24 02-24-10-21 1 \N \N \N \N \N \N \N \N
24 02-24-10-21 1 1 1 1 1.0 1 1 \N \N

-- !select_default_nullable --
21 04-21-11 1 1 1 2 1.0 1 1 \N \N
21 04-21-11 1 1 1 2 1.0 1 1 \N \N
22 04-22-10-21 0 0 1 1 0.5 1 0 \N \N
22 04-22-10-21 0 1 1 2 0.6666666666666666 1 0 \N \N
22 04-22-10-21 1 0 0 1 0.3333333333333333 1 0 \N \N
22 04-22-10-21 1 0 1 1 0.5 1 0 \N \N
23 04-23-10 1 1 1 2 1.0 1 1 \N \N
23 04-23-10 1 1 1 2 1.0 1 1 \N \N
24 02-24-10-21 1 1 1 2 1.0 1 1 \N \N
24 02-24-10-21 1 1 1 2 1.0 1 1 \N \N

-- !select_empty_window_nullable --
21 04-21-11 1 \N \N \N \N \N \N \N \N
21 04-21-11 1 1 1 1 1.0 1 1 \N \N
22 04-22-10-21 0 \N \N \N \N \N \N \N 0
22 04-22-10-21 0 1 1 1 1.0 1 1 0 \N
22 04-22-10-21 1 0 0 0 0.0 0 0 \N 1
22 04-22-10-21 1 0 0 0 0.0 0 0 1 \N
23 04-23-10 1 \N \N \N \N \N \N \N \N
23 04-23-10 1 1 1 1 1.0 1 1 \N \N
24 02-24-10-21 1 \N \N \N \N \N \N \N \N
24 02-24-10-21 1 1 1 1 1.0 1 1 \N \N

-- !select_default_old_planner --
21 04-21-11 1 1 1 2 1.0 1 1 \N 1
21 04-21-11 1 1 1 2 1.0 1 1 1 \N
22 04-22-10-21 0 0 1 1 0.5 1 0 \N 1
22 04-22-10-21 0 1 1 2 0.6666666666666666 1 0 1 1
22 04-22-10-21 1 0 0 1 0.3333333333333333 1 0 0 0
22 04-22-10-21 1 0 1 1 0.5 1 0 0 \N
23 04-23-10 1 1 1 2 1.0 1 1 \N 1
23 04-23-10 1 1 1 2 1.0 1 1 1 \N
24 02-24-10-21 1 1 1 2 1.0 1 1 \N 1
24 02-24-10-21 1 1 1 2 1.0 1 1 1 \N

-- !select_empty_window_old_planner --
21 04-21-11 1 \N \N \N \N \N \N \N \N
21 04-21-11 1 1 1 1 1.0 1 1 \N \N
22 04-22-10-21 0 \N \N \N \N \N \N \N 0
22 04-22-10-21 0 1 1 1 1.0 1 1 0 \N
22 04-22-10-21 1 0 0 0 0.0 0 0 \N 1
22 04-22-10-21 1 0 0 0 0.0 0 0 1 \N
23 04-23-10 1 \N \N \N \N \N \N \N \N
23 04-23-10 1 1 1 1 1.0 1 1 \N \N
24 02-24-10-21 1 \N \N \N \N \N \N \N \N
24 02-24-10-21 1 1 1 1 1.0 1 1 \N \N

-- !select_default_nullable_old_planner --
21 04-21-11 1 1 1 2 1.0 1 1 \N 1
21 04-21-11 1 1 1 2 1.0 1 1 1 \N
22 04-22-10-21 0 0 1 1 0.5 1 0 \N 1
22 04-22-10-21 0 1 1 2 0.6666666666666666 1 0 1 1
22 04-22-10-21 1 0 0 1 0.3333333333333333 1 0 0 0
22 04-22-10-21 1 0 1 1 0.5 1 0 0 \N
23 04-23-10 1 1 1 2 1.0 1 1 \N 1
23 04-23-10 1 1 1 2 1.0 1 1 1 \N
24 02-24-10-21 1 1 1 2 1.0 1 1 \N 1
24 02-24-10-21 1 1 1 2 1.0 1 1 1 \N

-- !select_empty_window_nullable_old_planner --
21 04-21-11 1 \N \N \N \N \N \N \N \N
21 04-21-11 1 1 1 1 1.0 1 1 \N \N
22 04-22-10-21 0 \N \N \N \N \N \N \N 0
22 04-22-10-21 0 1 1 1 1.0 1 1 0 \N
22 04-22-10-21 1 0 0 0 0.0 0 0 \N 1
22 04-22-10-21 1 0 0 0 0.0 0 0 1 \N
23 04-23-10 1 \N \N \N \N \N \N \N \N
23 04-23-10 1 1 1 1 1.0 1 1 \N \N
24 02-24-10-21 1 \N \N \N \N \N \N \N \N
24 02-24-10-21 1 1 1 1 1.0 1 1 \N \N

21 changes: 21 additions & 0 deletions regression-test/data/correctness_p0/test_first_value_window.out
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,24 @@
23 04-23-10 ["p7", "year4"] \N \N
24 02-24-10-21 [""] \N \N

-- !select_default2 --
21 04-21-11 1 1 1 1
22 04-22-10-21 0 0 0 0
22 04-22-10-21 1 0 0 0
23 04-23-10 1 1 1 1
24 02-24-10-21 1 1 1 1

-- !select_default3 --
1 21 04-21-11 \N \N \N 2
2 21 04-21-12 2 \N \N 2
3 21 04-21-13 3 2 2 2
4 22 04-22-10-21 \N \N \N \N
5 22 04-22-10-22 \N \N \N 5
6 22 04-22-10-23 5 \N \N 5
7 22 04-22-10-24 \N 5 5 5
8 22 04-22-10-25 9 \N \N 9
9 23 04-23-11 \N \N \N 10
10 23 04-23-12 10 \N \N 10
11 23 04-23-13 \N 10 10 10
12 24 02-24-10-21 \N \N \N \N

Loading

0 comments on commit 5739167

Please sign in to comment.