Skip to content

Commit

Permalink
Don't use hash grouping w/o keys in chunkwise aggregation (#7552)
Browse files Browse the repository at this point in the history
In chunkwise aggregation paths, we cannot use hash aggregation in
FinalizeAggregate without grouping, because it leads to a wrong result
in case when there are no input rows. Use the strategy type flags set by
the Postgres planner.
  • Loading branch information
akuzm authored Jan 6, 2025
1 parent 11e866e commit 487898c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 29 deletions.
28 changes: 7 additions & 21 deletions tsl/src/chunkwise_agg.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,8 @@ create_hashed_partial_agg_path(PlannerInfo *root, Path *path, PathTarget *target
static void
add_partially_aggregated_subpaths(PlannerInfo *root, PathTarget *input_target,
PathTarget *partial_grouping_target, double d_num_groups,
GroupPathExtraData *extra_data, bool can_sort, bool can_hash,
Path *subpath, List **sorted_paths, List **hashed_paths)
GroupPathExtraData *extra_data, Path *subpath,
List **sorted_paths, List **hashed_paths)
{
/* Translate targetlist for partition */
AppendRelInfo *appinfo = ts_get_appendrelinfo(root, subpath->parent->relid, false);
Expand Down Expand Up @@ -321,7 +321,7 @@ add_partially_aggregated_subpaths(PlannerInfo *root, PathTarget *input_target,
create_projection_path(root, subpath->parent, subpath, chunk_target_before_grouping);
}

if (can_sort)
if (extra_data->flags & GROUPING_CAN_USE_SORT)
{
AggPath *agg_path = create_sorted_partial_agg_path(root,
subpath,
Expand All @@ -332,7 +332,7 @@ add_partially_aggregated_subpaths(PlannerInfo *root, PathTarget *input_target,
*sorted_paths = lappend(*sorted_paths, (Path *) agg_path);
}

if (can_hash)
if (extra_data->flags & GROUPING_CAN_USE_HASH)
{
AggPath *agg_path = create_hashed_partial_agg_path(root,
subpath,
Expand All @@ -358,8 +358,7 @@ static void
generate_agg_pushdown_path(PlannerInfo *root, Path *cheapest_total_path, RelOptInfo *input_rel,
RelOptInfo *output_rel, RelOptInfo *partially_grouped_rel,
PathTarget *grouping_target, PathTarget *partial_grouping_target,
bool can_sort, bool can_hash, double d_num_groups,
GroupPathExtraData *extra_data)
double d_num_groups, GroupPathExtraData *extra_data)
{
/* Get subpaths */
List *subpaths = NIL;
Expand Down Expand Up @@ -424,14 +423,12 @@ generate_agg_pushdown_path(PlannerInfo *root, Path *cheapest_total_path, RelOptI
partial_grouping_target,
d_num_groups,
extra_data,
can_sort,
can_hash,
partially_compressed_path,
&partially_compressed_sorted /* Result path */,
&partially_compressed_hashed /* Result path */);
}

if (can_sort)
if (extra_data->flags & GROUPING_CAN_USE_SORT)
{
sorted_subpaths = lappend(sorted_subpaths,
copy_append_like_path(root,
Expand All @@ -440,7 +437,7 @@ generate_agg_pushdown_path(PlannerInfo *root, Path *cheapest_total_path, RelOptI
subpath->pathtarget));
}

if (can_hash)
if (extra_data->flags & GROUPING_CAN_USE_HASH)
{
hashed_subpaths = lappend(hashed_subpaths,
copy_append_like_path(root,
Expand All @@ -456,8 +453,6 @@ generate_agg_pushdown_path(PlannerInfo *root, Path *cheapest_total_path, RelOptI
partial_grouping_target,
d_num_groups,
extra_data,
can_sort,
can_hash,
subpath,
&sorted_subpaths /* Result paths */,
&hashed_subpaths /* Result paths */);
Expand Down Expand Up @@ -631,13 +626,6 @@ tsl_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_re
if (has_min_max_agg_path(output_rel))
return;

/* Is sorting possible ? */
bool can_sort = grouping_is_sortable(parse->groupClause);

/* Is hashing possible ? */
bool can_hash = grouping_is_hashable(parse->groupClause) &&
!ts_is_gapfill_path(linitial(output_rel->pathlist)) && enable_hashagg;

Assert(extra != NULL);
GroupPathExtraData *extra_data = (GroupPathExtraData *) extra;

Expand Down Expand Up @@ -717,8 +705,6 @@ tsl_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_re
partially_grouped_rel,
grouping_target,
partial_grouping_target,
can_sort,
can_hash,
d_num_groups,
extra_data);
}
Expand Down
20 changes: 13 additions & 7 deletions tsl/test/expected/vector_agg_functions.out
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ limit 1
set timescaledb.debug_require_vector_agg = :'guc_value';
---- Uncomment to generate reference. Note that there are minor discrepancies
---- on float4 due to different numeric stability in our and PG implementations.
--set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid';
--set timescaledb.enable_chunkwise_aggregation to off; set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid';
set max_parallel_workers_per_gather = 0;
select
format('%sselect %s%s(%s) from aggfns%s%s%s;',
Expand Down Expand Up @@ -6881,7 +6881,8 @@ select ss, min(cint2) from aggfns where cfloat8 > 1000 group by ss order by min(
select stddev(cint2) from aggfns where cfloat8 > 1000;
stddev
--------
(0 rows)

(1 row)

select s, stddev(cint2) from aggfns where cfloat8 > 1000 group by s order by stddev(cint2), s limit 10;
s | stddev
Expand Down Expand Up @@ -6960,7 +6961,8 @@ select ss, min(cint4) from aggfns where cfloat8 > 1000 group by ss order by min(
select stddev(cint4) from aggfns where cfloat8 > 1000;
stddev
--------
(0 rows)

(1 row)

select s, stddev(cint4) from aggfns where cfloat8 > 1000 group by s order by stddev(cint4), s limit 10;
s | stddev
Expand Down Expand Up @@ -6991,7 +6993,8 @@ select ss, sum(cint4) from aggfns where cfloat8 > 1000 group by ss order by sum(
select avg(cint8) from aggfns where cfloat8 > 1000;
avg
-----
(0 rows)

(1 row)

select s, avg(cint8) from aggfns where cfloat8 > 1000 group by s order by avg(cint8), s limit 10;
s | avg
Expand Down Expand Up @@ -7038,7 +7041,8 @@ select ss, min(cint8) from aggfns where cfloat8 > 1000 group by ss order by min(
select sum(cint8) from aggfns where cfloat8 > 1000;
sum
-----
(0 rows)

(1 row)

select s, sum(cint8) from aggfns where cfloat8 > 1000 group by s order by sum(cint8), s limit 10;
s | sum
Expand Down Expand Up @@ -7181,7 +7185,8 @@ select ss, min(s) from aggfns where cfloat8 > 1000 group by ss order by min(s),
select stddev(s) from aggfns where cfloat8 > 1000;
stddev
--------
(0 rows)

(1 row)

select s, stddev(s) from aggfns where cfloat8 > 1000 group by s order by stddev(s), s limit 10;
s | stddev
Expand Down Expand Up @@ -7260,7 +7265,8 @@ select ss, min(ss) from aggfns where cfloat8 > 1000 group by ss order by min(ss)
select stddev(ss) from aggfns where cfloat8 > 1000;
stddev
--------
(0 rows)

(1 row)

select s, stddev(ss) from aggfns where cfloat8 > 1000 group by s order by stddev(ss), s limit 10;
s | stddev
Expand Down
2 changes: 1 addition & 1 deletion tsl/test/sql/vector_agg_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ limit 1
set timescaledb.debug_require_vector_agg = :'guc_value';
---- Uncomment to generate reference. Note that there are minor discrepancies
---- on float4 due to different numeric stability in our and PG implementations.
--set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid';
--set timescaledb.enable_chunkwise_aggregation to off; set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid';

set max_parallel_workers_per_gather = 0;

Expand Down

0 comments on commit 487898c

Please sign in to comment.