From b92e622396bd6262c3a3ab9f8d52b255d593a2ff Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:32:23 +0200 Subject: [PATCH 01/58] Vectorized hash grouping on one column some experiments --- tsl/src/nodes/vector_agg/CMakeLists.txt | 1 + tsl/src/nodes/vector_agg/exec.c | 26 +- .../vector_agg/function/agg_const_helper.c | 4 +- .../vector_agg/function/agg_many_helper.c | 31 ++ .../function/float48_accum_single.c | 55 ++- tsl/src/nodes/vector_agg/function/functions.c | 24 ++ tsl/src/nodes/vector_agg/function/functions.h | 11 +- .../vector_agg/function/int128_accum_single.c | 13 + .../function/int24_avg_accum_single.c | 12 + .../vector_agg/function/int24_sum_single.c | 12 + .../function/minmax_arithmetic_single.c | 31 +- .../vector_agg/function/sum_float_single.c | 12 + tsl/src/nodes/vector_agg/grouping_policy.h | 5 +- .../nodes/vector_agg/grouping_policy_batch.c | 11 +- .../nodes/vector_agg/grouping_policy_hash.c | 340 ++++++++++++++++++ tsl/src/nodes/vector_agg/plan.c | 29 +- 16 files changed, 578 insertions(+), 39 deletions(-) create mode 100644 tsl/src/nodes/vector_agg/function/agg_many_helper.c create mode 100644 tsl/src/nodes/vector_agg/grouping_policy_hash.c diff --git a/tsl/src/nodes/vector_agg/CMakeLists.txt b/tsl/src/nodes/vector_agg/CMakeLists.txt index e621571d5f5..c3a85bbd30f 100644 --- a/tsl/src/nodes/vector_agg/CMakeLists.txt +++ b/tsl/src/nodes/vector_agg/CMakeLists.txt @@ -2,5 +2,6 @@ add_subdirectory(function) set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/exec.c ${CMAKE_CURRENT_SOURCE_DIR}/grouping_policy_batch.c + ${CMAKE_CURRENT_SOURCE_DIR}/grouping_policy_hash.c ${CMAKE_CURRENT_SOURCE_DIR}/plan.c) target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES}) diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index 381944f5172..eae2babe0ac 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -122,11 +122,27 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) } } - List *grouping_column_offsets = linitial(cscan->custom_private); - vector_agg_state->grouping = - create_grouping_policy_batch(vector_agg_state->agg_defs, - vector_agg_state->output_grouping_columns, - /* partial_per_batch = */ grouping_column_offsets != NIL); + /// List *grouping_child_output_offsets = linitial(cscan->custom_private); + if (list_length(vector_agg_state->output_grouping_columns) == 1) + { + GroupingColumn *col = + (GroupingColumn *) linitial(vector_agg_state->output_grouping_columns); + DecompressContext *dcontext = &decompress_state->decompress_context; + CompressionColumnDescription *desc = &dcontext->compressed_chunk_columns[col->input_offset]; + if (desc->type == COMPRESSED_COLUMN) + { + vector_agg_state->grouping = + create_grouping_policy_hash(vector_agg_state->agg_defs, + vector_agg_state->output_grouping_columns); + } + } + + if (vector_agg_state->grouping == NULL) + { + vector_agg_state->grouping = + create_grouping_policy_batch(vector_agg_state->agg_defs, + vector_agg_state->output_grouping_columns); + } } static void diff --git a/tsl/src/nodes/vector_agg/function/agg_const_helper.c b/tsl/src/nodes/vector_agg/function/agg_const_helper.c index a1abe481aec..c83d38526be 100644 --- a/tsl/src/nodes/vector_agg/function/agg_const_helper.c +++ b/tsl/src/nodes/vector_agg/function/agg_const_helper.c @@ -11,13 +11,13 @@ * implementation otherwise. */ static void -FUNCTION_NAME(const)(void *agg_state, Datum constvalue, bool constisnull, int n, +FUNCTION_NAME(const)(void *agg_state, Datum constvalue, bool constisnull, int nn, MemoryContext agg_extra_mctx) { const uint64 valid = constisnull ? 0 : 1; const CTYPE value = valid ? DATUM_TO_CTYPE(constvalue) : 0; - for (int i = 0; i < n; i++) + for (int i = 0; i < nn; i++) { FUNCTION_NAME(vector_impl)(agg_state, 1, &value, &valid, NULL, agg_extra_mctx); } diff --git a/tsl/src/nodes/vector_agg/function/agg_many_helper.c b/tsl/src/nodes/vector_agg/function/agg_many_helper.c new file mode 100644 index 00000000000..a0cc7f3513f --- /dev/null +++ b/tsl/src/nodes/vector_agg/function/agg_many_helper.c @@ -0,0 +1,31 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +static void +FUNCTION_NAME(many)(void *restrict agg_states, int32 *restrict offsets, const ArrowArray *vector, + MemoryContext agg_extra_mctx) +{ + MemoryContext old = MemoryContextSwitchTo(agg_extra_mctx); + const int n = vector->length; + const CTYPE *values = vector->buffers[1]; + const uint64 *valid = vector->buffers[0]; + for (int row = 0; row < n; row++) + { + if (offsets[row] == 0) + { + continue; + } + + if (!arrow_row_is_valid(valid, row)) + { + continue; + } + + FUNCTION_NAME(state) *state = (offsets[row] + (FUNCTION_NAME(state) *) agg_states); + FUNCTION_NAME(one)(state, values[row]); + } + MemoryContextSwitchTo(old); +} diff --git a/tsl/src/nodes/vector_agg/function/float48_accum_single.c b/tsl/src/nodes/vector_agg/function/float48_accum_single.c index 13075b29807..c54c0405384 100644 --- a/tsl/src/nodes/vector_agg/function/float48_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/float48_accum_single.c @@ -194,13 +194,13 @@ FUNCTION_NAME(combine)(double *inout_N, double *inout_Sx, #endif static pg_attribute_always_inline void -FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const uint64 *valid1, +FUNCTION_NAME(vector_impl)(void *agg_state, size_t n, const CTYPE *values, const uint64 *valid1, const uint64 *valid2, MemoryContext agg_extra_mctx) { /* * Vector registers can be up to 512 bits wide. */ -#define UNROLL_SIZE ((int) (512 / 8 / sizeof(CTYPE))) +#define UNROLL_SIZE ((int) ((512 / 8) / sizeof(CTYPE))) /* * Each inner iteration works with its own accumulators to avoid data @@ -212,7 +212,7 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui double Sxxarray[UNROLL_SIZE] = { 0 }; #endif - int row = 0; + size_t row = 0; #ifdef NEED_SXX /* @@ -220,7 +220,7 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui * to make the actual update function branchless, namely the computation of * Sxx which works differently for the first row. */ - for (int inner = 0; inner < UNROLL_SIZE; inner++) + for (size_t inner = 0; inner < UNROLL_SIZE; inner++) { for (; row < n; row++) { @@ -240,7 +240,8 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui * Scroll to the row that is a multiple of UNROLL_SIZE. This is the correct * row at which to enter the unrolled loop below. */ - for (int inner = row % UNROLL_SIZE; inner > 0 && inner < UNROLL_SIZE && row < n; inner++, row++) + for (size_t inner = row % UNROLL_SIZE; inner > 0 && inner < UNROLL_SIZE && row < n; + inner++, row++) { UPDATE(valid1, valid2, values, row, &Narray[inner], &Sxarray[inner], &Sxxarray[inner]); } @@ -252,7 +253,7 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui Assert(row % UNROLL_SIZE == 0 || row == n); for (; row < UNROLL_SIZE * (n / UNROLL_SIZE); row += UNROLL_SIZE) { - for (int inner = 0; inner < UNROLL_SIZE; inner++) + for (size_t inner = 0; inner < UNROLL_SIZE; inner++) { UPDATE(valid1, valid2, @@ -269,7 +270,7 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui */ for (; row < n; row++) { - const int inner = row % UNROLL_SIZE; + const size_t inner = row % UNROLL_SIZE; UPDATE(valid1, valid2, values, row, &Narray[inner], &Sxarray[inner], &Sxxarray[inner]); } @@ -289,14 +290,44 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui COMBINE(&state->N, &state->Sx, &state->Sxx, Narray[0], Sxarray[0], Sxxarray[0]); } +static pg_attribute_always_inline void +FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) +{ + FUNCTION_NAME(state) *state = (FUNCTION_NAME(state) *) agg_state; + /* + * This code follows the Postgres float8_accum() transition function, see + * the comments there. + */ + const double newN = state->N + 1.0; + const double newSx = state->Sx + value; +#ifdef NEED_SXX + if (state->N > 0.0) + { + const double tmp = value * newN - newSx; + state->Sxx += tmp * tmp / (state->N * newN); + } + else + { + state->Sxx = 0 * value; + } +#endif + + state->N = newN; + state->Sx = newSx; +} + #include "agg_const_helper.c" +#include "agg_many_helper.c" #include "agg_vector_validity_helper.c" -VectorAggFunctions FUNCTION_NAME(argdef) = { .state_bytes = sizeof(FUNCTION_NAME(state)), - .agg_init = FUNCTION_NAME(init), - .agg_emit = FUNCTION_NAME(emit), - .agg_const = FUNCTION_NAME(const), - .agg_vector = FUNCTION_NAME(vector) }; +VectorAggFunctions FUNCTION_NAME(argdef) = { + .state_bytes = sizeof(FUNCTION_NAME(state)), + .agg_init = FUNCTION_NAME(init), + .agg_emit = FUNCTION_NAME(emit), + .agg_const = FUNCTION_NAME(const), + .agg_vector = FUNCTION_NAME(vector), + .agg_many = FUNCTION_NAME(many), +}; #undef UPDATE #undef COMBINE diff --git a/tsl/src/nodes/vector_agg/function/functions.c b/tsl/src/nodes/vector_agg/function/functions.c index 6cbb7dccf0d..a5bb1226929 100644 --- a/tsl/src/nodes/vector_agg/function/functions.c +++ b/tsl/src/nodes/vector_agg/function/functions.c @@ -110,12 +110,36 @@ count_any_vector(void *agg_state, const ArrowArray *vector, const uint64 *filter } } +static void +count_any_many(void *restrict agg_states, int32 *restrict offsets, const ArrowArray *vector, + MemoryContext agg_extra_mctx) +{ + const int n = vector->length; + const uint64 *valid = vector->buffers[0]; + for (int row = 0; row < n; row++) + { + if (offsets[row] == 0) + { + continue; + } + + if (!arrow_row_is_valid(valid, row)) + { + continue; + } + + CountState *state = (offsets[row] + (CountState *) agg_states); + state->count++; + } +} + VectorAggFunctions count_any_agg = { .state_bytes = sizeof(CountState), .agg_init = count_init, .agg_emit = count_emit, .agg_const = count_any_const, .agg_vector = count_any_vector, + .agg_many = count_any_many, }; /* diff --git a/tsl/src/nodes/vector_agg/function/functions.h b/tsl/src/nodes/vector_agg/function/functions.h index 773bffe584c..70ec3d5a2c7 100644 --- a/tsl/src/nodes/vector_agg/function/functions.h +++ b/tsl/src/nodes/vector_agg/function/functions.h @@ -17,18 +17,21 @@ typedef struct size_t state_bytes; /* Initialize the aggregate function state pointed to by agg_value and agg_isnull. */ - void (*agg_init)(void *agg_state); + void (*agg_init)(void *restrict agg_state); /* Aggregate a given arrow array. */ - void (*agg_vector)(void *agg_state, const ArrowArray *vector, const uint64 *filter, + void (*agg_vector)(void *restrict agg_state, const ArrowArray *vector, const uint64 *filter, MemoryContext agg_extra_mctx); /* Aggregate a constant (like segmentby or column with default value). */ - void (*agg_const)(void *agg_state, Datum constvalue, bool constisnull, int n, + void (*agg_const)(void *restrict agg_state, Datum constvalue, bool constisnull, int n, MemoryContext agg_extra_mctx); + void (*agg_many)(void *restrict agg_states, int32 *restrict offsets, const ArrowArray *vector, + MemoryContext agg_extra_mctx); + /* Emit a partial result. */ - void (*agg_emit)(void *agg_state, Datum *out_result, bool *out_isnull); + void (*agg_emit)(void *restrict agg_state, Datum *out_result, bool *out_isnull); } VectorAggFunctions; VectorAggFunctions *get_vector_aggregate(Oid aggfnoid); diff --git a/tsl/src/nodes/vector_agg/function/int128_accum_single.c b/tsl/src/nodes/vector_agg/function/int128_accum_single.c index e50a743c901..39bd665f3c9 100644 --- a/tsl/src/nodes/vector_agg/function/int128_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/int128_accum_single.c @@ -96,7 +96,19 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui #endif } +static pg_attribute_always_inline void +FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) +{ + FUNCTION_NAME(state) *state = (FUNCTION_NAME(state) *) agg_state; + state->N++; + state->sumX += value; +#ifdef NEED_SUMX2 + state->sumX2 += ((int128) value) * ((int128) value); +#endif +} + #include "agg_const_helper.c" +#include "agg_many_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { @@ -105,6 +117,7 @@ VectorAggFunctions FUNCTION_NAME(argdef) = { .agg_emit = FUNCTION_NAME(emit), .agg_const = FUNCTION_NAME(const), .agg_vector = FUNCTION_NAME(vector), + .agg_many = FUNCTION_NAME(many), }; #endif diff --git a/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c b/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c index 551730c1912..2051fe006d5 100644 --- a/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c @@ -28,7 +28,18 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui state->sum += batch_sum; } +typedef Int24AvgAccumState FUNCTION_NAME(state); + +static pg_attribute_always_inline void +FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) +{ + FUNCTION_NAME(state) *state = (FUNCTION_NAME(state) *) agg_state; + state->count++; + state->sum += value; +} + #include "agg_const_helper.c" +#include "agg_many_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { @@ -37,6 +48,7 @@ VectorAggFunctions FUNCTION_NAME(argdef) = { .agg_emit = int24_avg_accum_emit, .agg_const = FUNCTION_NAME(const), .agg_vector = FUNCTION_NAME(vector), + .agg_many = FUNCTION_NAME(many), }; #endif diff --git a/tsl/src/nodes/vector_agg/function/int24_sum_single.c b/tsl/src/nodes/vector_agg/function/int24_sum_single.c index e84c873aed2..c96a45fc96f 100644 --- a/tsl/src/nodes/vector_agg/function/int24_sum_single.c +++ b/tsl/src/nodes/vector_agg/function/int24_sum_single.c @@ -51,7 +51,18 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui state->isnull &= !have_result; } +static pg_attribute_always_inline void +FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) +{ + Int24SumState *state = (Int24SumState *) agg_state; + state->result += value; + state->isnull = false; +} + +typedef Int24SumState FUNCTION_NAME(state); + #include "agg_const_helper.c" +#include "agg_many_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { @@ -60,6 +71,7 @@ VectorAggFunctions FUNCTION_NAME(argdef) = { .agg_emit = int_sum_emit, .agg_const = FUNCTION_NAME(const), .agg_vector = FUNCTION_NAME(vector), + .agg_many = FUNCTION_NAME(many), }; #endif diff --git a/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c b/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c index f21521f12bd..127b5176291 100644 --- a/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c +++ b/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c @@ -41,14 +41,35 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui MemoryContextSwitchTo(old); } +typedef MinMaxState FUNCTION_NAME(state); + +static pg_attribute_always_inline void +FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) +{ + FUNCTION_NAME(state) *state = (FUNCTION_NAME(state) *) agg_state; + if (!state->isvalid || PREDICATE(DATUM_TO_CTYPE(state->value), value) || isnan((double) value)) + { + /* + * Note that float8 Datum is by-reference on 32-bit systems, and this + * function is called in the extra aggregate data memory context. + */ + state->value = CTYPE_TO_DATUM(value); + state->isvalid = true; + } +} + #include "agg_const_helper.c" +#include "agg_many_helper.c" #include "agg_vector_validity_helper.c" -VectorAggFunctions FUNCTION_NAME(argdef) = { .state_bytes = sizeof(MinMaxState), - .agg_init = minmax_init, - .agg_emit = minmax_emit, - .agg_const = FUNCTION_NAME(const), - .agg_vector = FUNCTION_NAME(vector) }; +VectorAggFunctions FUNCTION_NAME(argdef) = { + .state_bytes = sizeof(MinMaxState), + .agg_init = minmax_init, + .agg_emit = minmax_emit, + .agg_const = FUNCTION_NAME(const), + .agg_vector = FUNCTION_NAME(vector), + .agg_many = FUNCTION_NAME(many), +}; #endif #undef PG_TYPE diff --git a/tsl/src/nodes/vector_agg/function/sum_float_single.c b/tsl/src/nodes/vector_agg/function/sum_float_single.c index 59a5cfe1fc7..41565feabfc 100644 --- a/tsl/src/nodes/vector_agg/function/sum_float_single.c +++ b/tsl/src/nodes/vector_agg/function/sum_float_single.c @@ -80,7 +80,18 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui state->result += sum_accu[0]; } +typedef FloatSumState FUNCTION_NAME(state); + +static pg_attribute_always_inline void +FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) +{ + FUNCTION_NAME(state) *state = (FUNCTION_NAME(state) *) agg_state; + state->isnull = false; + state->result += value; +} + #include "agg_const_helper.c" +#include "agg_many_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { @@ -89,6 +100,7 @@ VectorAggFunctions FUNCTION_NAME(argdef) = { .agg_emit = FUNCTION_NAME(emit), .agg_const = FUNCTION_NAME(const), .agg_vector = FUNCTION_NAME(vector), + .agg_many = FUNCTION_NAME(many), }; #endif diff --git a/tsl/src/nodes/vector_agg/grouping_policy.h b/tsl/src/nodes/vector_agg/grouping_policy.h index 108584f19ef..8aaf9c99f47 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy.h +++ b/tsl/src/nodes/vector_agg/grouping_policy.h @@ -37,5 +37,6 @@ typedef struct GroupingPolicy void (*gp_destroy)(GroupingPolicy *gp); } GroupingPolicy; -extern GroupingPolicy *create_grouping_policy_batch(List *agg_defs, List *grouping_columns, - bool partial_per_batch); +extern GroupingPolicy *create_grouping_policy_batch(List *agg_defs, List *grouping_columns); + +extern GroupingPolicy *create_grouping_policy_hash(List *agg_defs, List *grouping_columns); diff --git a/tsl/src/nodes/vector_agg/grouping_policy_batch.c b/tsl/src/nodes/vector_agg/grouping_policy_batch.c index ee42a5b6e1e..a37747f5166 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_batch.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_batch.c @@ -28,7 +28,6 @@ typedef struct List *output_grouping_columns; Datum *output_grouping_values; bool *output_grouping_isnull; - bool partial_per_batch; bool have_results; /* @@ -42,10 +41,9 @@ typedef struct static const GroupingPolicy grouping_policy_batch_functions; GroupingPolicy * -create_grouping_policy_batch(List *agg_defs, List *output_grouping_columns, bool partial_per_batch) +create_grouping_policy_batch(List *agg_defs, List *output_grouping_columns) { GroupingPolicyBatch *policy = palloc0(sizeof(GroupingPolicyBatch)); - policy->partial_per_batch = partial_per_batch; policy->funcs = grouping_policy_batch_functions; policy->output_grouping_columns = output_grouping_columns; policy->agg_defs = agg_defs; @@ -180,7 +178,6 @@ gp_batch_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) * means we're grouping by segmentby, and these values will be valid * until the next call to the vector agg node. */ - Assert(policy->partial_per_batch); policy->output_grouping_values[i] = *values->output_value; policy->output_grouping_isnull[i] = *values->output_isnull; } @@ -192,7 +189,11 @@ static bool gp_batch_should_emit(GroupingPolicy *gp) { GroupingPolicyBatch *policy = (GroupingPolicyBatch *) gp; - return policy->partial_per_batch && policy->have_results; + /* + * If we're grouping by segmentby columns, we have to output partials for + * every batch. + */ + return policy->output_grouping_columns != NIL && policy->have_results; } static bool diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c new file mode 100644 index 00000000000..bffb77912f7 --- /dev/null +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -0,0 +1,340 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * This grouping policy aggregates entire compressed batches. It can be used to + * aggregate with no grouping, or to produce partial aggregates per each batch + * to group by segmentby columns. + */ + +#include + +#include +#include + +#include "grouping_policy.h" + +#include "nodes/decompress_chunk/compressed_batch.h" +#include "nodes/vector_agg/exec.h" + +typedef struct +{ + Datum key; + uint32 status; + uint32 agg_state_index; +} HashEntry; + +static uint64_t +hash64(uint64_t x) +{ + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9U; + x ^= x >> 27; + x *= 0x94d049bb133111ebU; + x ^= x >> 31; + return x; +} + +#define SH_PREFIX h +#define SH_ELEMENT_TYPE HashEntry +#define SH_KEY_TYPE Datum +#define SH_KEY key +#define SH_HASH_KEY(tb, key) hash64(key) +#define SH_EQUAL(tb, a, b) a == b +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +struct h_hash; + +typedef struct +{ + GroupingPolicy funcs; + List *agg_defs; + List *output_grouping_columns; + bool partial_per_batch; + struct h_hash *table; + bool have_null_key; + struct h_iterator iter; + bool returning_results; + + /* + * A memory context for aggregate functions to allocate additional data, + * i.e. if they store strings or float8 datum on 32-bit systems. Valid until + * the grouping policy is reset. + */ + MemoryContext agg_extra_mctx; + + uint64 aggstate_bytes_per_key; + uint64 allocated_aggstate_rows; + List *per_agg_states; +} GroupingPolicyHash; + +static const GroupingPolicy grouping_policy_hash_functions; + +GroupingPolicy * +create_grouping_policy_hash(List *agg_defs, List *output_grouping_columns) +{ + GroupingPolicyHash *policy = palloc0(sizeof(GroupingPolicyHash)); + policy->funcs = grouping_policy_hash_functions; + policy->output_grouping_columns = output_grouping_columns; + policy->agg_defs = agg_defs; + policy->agg_extra_mctx = + AllocSetContextCreate(CurrentMemoryContext, "agg extra", ALLOCSET_DEFAULT_SIZES); + policy->allocated_aggstate_rows = 1000; + ListCell *lc; + foreach (lc, agg_defs) + { + VectorAggDef *def = lfirst(lc); + policy->aggstate_bytes_per_key += def->func->state_bytes; + + policy->per_agg_states = + lappend(policy->per_agg_states, + palloc0(def->func->state_bytes * policy->allocated_aggstate_rows)); + } + + policy->table = h_create(CurrentMemoryContext, 1000, NULL); + policy->have_null_key = false; + + policy->returning_results = false; + + return &policy->funcs; +} + +static void +gp_hash_reset(GroupingPolicy *obj) +{ + GroupingPolicyHash *policy = (GroupingPolicyHash *) obj; + + MemoryContextReset(policy->agg_extra_mctx); + + policy->returning_results = false; + + h_reset(policy->table); + policy->have_null_key = false; +} + +static void +compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_def, void *agg_states, + int32 *offsets, MemoryContext agg_extra_mctx) +{ + ArrowArray *arg_arrow = NULL; + Datum arg_datum = 0; + bool arg_isnull = true; + + /* + * We have functions with one argument, and one function with no arguments + * (count(*)). Collect the arguments. + */ + if (agg_def->input_offset >= 0) + { + CompressedColumnValues *values = &batch_state->compressed_columns[agg_def->input_offset]; + Assert(values->decompression_type != DT_Invalid); + Assert(values->decompression_type != DT_Iterator); + + if (values->arrow != NULL) + { + arg_arrow = values->arrow; + } + else + { + Assert(values->decompression_type == DT_Scalar); + arg_datum = *values->output_value; + arg_isnull = *values->output_isnull; + } + } + + /* + * Now call the function. + */ + if (arg_arrow != NULL) + { + /* Arrow argument. */ + agg_def->func->agg_many(agg_states, offsets, arg_arrow, agg_extra_mctx); + } + else + { + /* + * Scalar argument, or count(*). + */ + for (int i = 0; i < batch_state->total_batch_rows; i++) + { + if (!arrow_row_is_valid(batch_state->vector_qual_result, i)) + { + continue; + } + + if (offsets[i] == 0) + { + continue; + } + + void *state = (offsets[i] * agg_def->func->state_bytes + (char *) agg_states); + agg_def->func->agg_const(state, arg_datum, arg_isnull, 1, agg_extra_mctx); + } + } +} + +static void +gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) +{ + GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; + + Assert(!policy->returning_results); + + /* + * State index zero is invalid, and state index one is for null key. We have + * to initialize it at the first run. + */ + const uint64 last_initialized_state_index = + policy->table->members ? policy->table->members + 2 : 1; + uint64 next_unused_state_index = policy->table->members + 2; + + int32 offsets[1000] = { 0 }; + Assert(batch_state->total_batch_rows <= 1000); + + /* + * For the partial aggregation node, the grouping columns are always in the + * output, so we don't have to separately look at the list of the grouping + * columns. + */ + Assert(list_length(policy->output_grouping_columns) == 1); + GroupingColumn *g = linitial(policy->output_grouping_columns); + CompressedColumnValues *gv = &batch_state->compressed_columns[g->input_offset]; + // Assert(gv->decompression_type == 8 /* lolwut */); + Assert(gv->decompression_type > 0); + const void *vv = gv->arrow->buffers[1]; + const uint64 *key_validity = gv->arrow->buffers[0]; + const uint64 *filter = batch_state->vector_qual_result; + for (int row = 0; row < batch_state->total_batch_rows; row++) + { + Datum key = { 0 }; + memcpy(&key, gv->decompression_type * row + (char *) vv, gv->decompression_type); + if (!arrow_row_is_valid(filter, row)) + { + continue; + } + + if (arrow_row_is_valid(key_validity, row)) + { + bool found = false; + HashEntry *entry = h_insert(policy->table, key, &found); + if (!found) + { + entry->agg_state_index = next_unused_state_index++; + } + offsets[row] = entry->agg_state_index; + } + else + { + policy->have_null_key = true; + offsets[row] = 1; + } + } + + ListCell *aggdeflc; + ListCell *aggstatelc; + + if (next_unused_state_index > last_initialized_state_index) + { + if (next_unused_state_index > policy->allocated_aggstate_rows) + { + policy->allocated_aggstate_rows = policy->allocated_aggstate_rows * 2 + 1; + forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) + { + VectorAggDef *def = lfirst(aggdeflc); + lfirst(aggstatelc) = + repalloc(lfirst(aggstatelc), + policy->allocated_aggstate_rows * def->func->state_bytes); + } + } + + forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) + { + VectorAggDef *def = lfirst(aggdeflc); + for (uint64 i = last_initialized_state_index; i < next_unused_state_index; i++) + { + void *aggstate = def->func->state_bytes * i + (char *) lfirst(aggstatelc); + def->func->agg_init(aggstate); + } + } + } + + forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) + { + compute_single_aggregate(batch_state, + lfirst(aggdeflc), + lfirst(aggstatelc), + offsets, + policy->agg_extra_mctx); + } +} + +static bool +gp_hash_should_emit(GroupingPolicy *gp) +{ + GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; + if (policy->table->members + policy->have_null_key > 0) + { + return true; + } + return false; +} + +static bool +gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) +{ + GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; + + if (!policy->returning_results) + { + /* FIXME doesn't work on final result emission w/o should_emit. */ + policy->returning_results = true; + h_start_iterate(policy->table, &policy->iter); + } + + HashEntry null_key_entry = { .agg_state_index = 1 }; + HashEntry *entry = h_iterate(policy->table, &policy->iter); + bool key_is_null = false; + if (entry == NULL && policy->have_null_key) + { + policy->have_null_key = false; + entry = &null_key_entry; + key_is_null = true; + } + + if (entry == NULL) + { + policy->returning_results = false; + return false; + } + + const int naggs = list_length(policy->agg_defs); + for (int i = 0; i < naggs; i++) + { + VectorAggDef *agg_def = (VectorAggDef *) list_nth(policy->agg_defs, i); + void *agg_states = list_nth(policy->per_agg_states, i); + void *agg_state = entry->agg_state_index * agg_def->func->state_bytes + (char *) agg_states; + agg_def->func->agg_emit(agg_state, + &aggregated_slot->tts_values[agg_def->output_offset], + &aggregated_slot->tts_isnull[agg_def->output_offset]); + } + + Assert(list_length(policy->output_grouping_columns) == 1); + GroupingColumn *col = linitial(policy->output_grouping_columns); + aggregated_slot->tts_values[col->output_offset] = entry->key; + aggregated_slot->tts_isnull[col->output_offset] = key_is_null; + + return true; +} + +static const GroupingPolicy grouping_policy_hash_functions = { + .gp_reset = gp_hash_reset, + .gp_add_batch = gp_hash_add_batch, + .gp_should_emit = gp_hash_should_emit, + .gp_do_emit = gp_hash_do_emit, +}; diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index ac150b7ea99..c1ff9eac806 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -154,13 +154,13 @@ vector_agg_plan_create(Agg *agg, CustomScan *decompress_chunk) custom->scan.plan.extParam = bms_copy(agg->plan.extParam); custom->scan.plan.allParam = bms_copy(agg->plan.allParam); - List *grouping_col_offsets = NIL; + List *grouping_child_output_offsets = NIL; for (int i = 0; i < agg->numCols; i++) { - grouping_col_offsets = - lappend_int(grouping_col_offsets, AttrNumberGetAttrOffset(agg->grpColIdx[i])); + grouping_child_output_offsets = + lappend_int(grouping_child_output_offsets, AttrNumberGetAttrOffset(agg->grpColIdx[i])); } - custom->custom_private = list_make1(grouping_col_offsets); + custom->custom_private = list_make1(grouping_child_output_offsets); return (Plan *) custom; } @@ -323,6 +323,27 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom) return true; } + if (agg->numCols == 1) + { + int offset = AttrNumberGetAttrOffset(agg->grpColIdx[0]); + TargetEntry *entry = list_nth(agg->plan.targetlist, offset); + fprintf(stderr, "target entry:\n"); + my_print(entry); + + bool is_segmentby = false; + if (is_vector_var(custom, entry->expr, &is_segmentby)) + { + Var *var = castNode(Var, entry->expr); + int16 typlen; + bool typbyval; + get_typlenbyval(var->vartype, &typlen, &typbyval); + if (typbyval && typlen > 0 && (size_t) typlen <= sizeof(Datum)) + { + return true; + } + } + } + for (int i = 0; i < agg->numCols; i++) { int offset = AttrNumberGetAttrOffset(agg->grpColIdx[i]); From 74d4419dea4d686ef46913d751f84342e67a7acf Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:39:46 +0200 Subject: [PATCH 02/58] benchmark vectorized grouping (2024-10-02 no. 6) From baedf7f09fe95c9532a3cf0bfbb434d8970d3010 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 2 Oct 2024 11:10:25 +0200 Subject: [PATCH 03/58] fixes --- tsl/src/compression/arrow_c_data_interface.h | 2 +- .../nodes/vector_agg/grouping_policy_hash.c | 18 ++++++++++++++---- tsl/src/nodes/vector_agg/plan.c | 2 -- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tsl/src/compression/arrow_c_data_interface.h b/tsl/src/compression/arrow_c_data_interface.h index a0362f85328..6a1d7ae114d 100644 --- a/tsl/src/compression/arrow_c_data_interface.h +++ b/tsl/src/compression/arrow_c_data_interface.h @@ -190,7 +190,7 @@ pad_to_multiple(uint64 pad_to, uint64 source_value) } static inline size_t -arrow_num_valid(uint64 *bitmap, size_t total_rows) +arrow_num_valid(const uint64 *bitmap, size_t total_rows) { if (bitmap == NULL) { diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index bffb77912f7..e11cdbd3995 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -72,6 +72,8 @@ typedef struct uint64 aggstate_bytes_per_key; uint64 allocated_aggstate_rows; List *per_agg_states; + + uint64 stat_input_valid_rows; } GroupingPolicyHash; static const GroupingPolicy grouping_policy_hash_functions; @@ -116,6 +118,8 @@ gp_hash_reset(GroupingPolicy *obj) h_reset(policy->table); policy->have_null_key = false; + + policy->stat_input_valid_rows = 0; } static void @@ -236,6 +240,8 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) } } + policy->stat_input_valid_rows += arrow_num_valid(key_validity, batch_state->total_batch_rows); + ListCell *aggdeflc; ListCell *aggstatelc; @@ -278,10 +284,11 @@ static bool gp_hash_should_emit(GroupingPolicy *gp) { GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; - if (policy->table->members + policy->have_null_key > 0) - { - return true; - } + (void) policy; +// if (policy->table->members + policy->have_null_key > 0) +// { +// return true; +// } return false; } @@ -295,6 +302,9 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) /* FIXME doesn't work on final result emission w/o should_emit. */ policy->returning_results = true; h_start_iterate(policy->table, &policy->iter); +// fprintf(stderr, "spill after %ld input rows, %d keys, %f ratio\n", +// policy->stat_input_valid_rows, policy->table->members + policy->have_null_key, +// policy->stat_input_valid_rows / (float) (policy->table->members + policy->have_null_key)); } HashEntry null_key_entry = { .agg_state_index = 1 }; diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index c1ff9eac806..f358d7a5a28 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -327,8 +327,6 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom) { int offset = AttrNumberGetAttrOffset(agg->grpColIdx[0]); TargetEntry *entry = list_nth(agg->plan.targetlist, offset); - fprintf(stderr, "target entry:\n"); - my_print(entry); bool is_segmentby = false; if (is_vector_var(custom, entry->expr, &is_segmentby)) From 35dbd361a428b0b572140606999d97abe1ee83a0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 2 Oct 2024 11:10:30 +0200 Subject: [PATCH 04/58] benchmark vectorized grouping (2024-10-02 no. 7) From 74fffd3f4cbf94f1a6e376d31180dc1411f41bd1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:07:58 +0200 Subject: [PATCH 05/58] some ugly stuff --- tsl/src/nodes/vector_agg/exec.c | 3 +- .../vector_agg/function/agg_many_helper.c | 8 +- tsl/src/nodes/vector_agg/function/functions.c | 19 +- tsl/src/nodes/vector_agg/function/functions.h | 5 +- .../nodes/vector_agg/grouping_policy_hash.c | 257 ++++++++++++++---- tsl/test/expected/vector_agg_functions.out | 26 +- 6 files changed, 248 insertions(+), 70 deletions(-) diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index eae2babe0ac..548b6ca623f 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -129,7 +129,8 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) (GroupingColumn *) linitial(vector_agg_state->output_grouping_columns); DecompressContext *dcontext = &decompress_state->decompress_context; CompressionColumnDescription *desc = &dcontext->compressed_chunk_columns[col->input_offset]; - if (desc->type == COMPRESSED_COLUMN) + if (desc->type == COMPRESSED_COLUMN && desc->by_value && desc->value_bytes > 0 && + (size_t) desc->value_bytes <= sizeof(Datum)) { vector_agg_state->grouping = create_grouping_policy_hash(vector_agg_state->agg_defs, diff --git a/tsl/src/nodes/vector_agg/function/agg_many_helper.c b/tsl/src/nodes/vector_agg/function/agg_many_helper.c index a0cc7f3513f..ef8acd28181 100644 --- a/tsl/src/nodes/vector_agg/function/agg_many_helper.c +++ b/tsl/src/nodes/vector_agg/function/agg_many_helper.c @@ -5,7 +5,7 @@ */ static void -FUNCTION_NAME(many)(void *restrict agg_states, int32 *restrict offsets, const ArrowArray *vector, +FUNCTION_NAME(many)(void *restrict agg_states, uint32 *restrict offsets, const ArrowArray *vector, MemoryContext agg_extra_mctx) { MemoryContext old = MemoryContextSwitchTo(agg_extra_mctx); @@ -14,6 +14,9 @@ FUNCTION_NAME(many)(void *restrict agg_states, int32 *restrict offsets, const Ar const uint64 *valid = vector->buffers[0]; for (int row = 0; row < n; row++) { + FUNCTION_NAME(state) *state = (offsets[row] + (FUNCTION_NAME(state) *) agg_states); + CTYPE value = values[row]; + if (offsets[row] == 0) { continue; @@ -24,8 +27,7 @@ FUNCTION_NAME(many)(void *restrict agg_states, int32 *restrict offsets, const Ar continue; } - FUNCTION_NAME(state) *state = (offsets[row] + (FUNCTION_NAME(state) *) agg_states); - FUNCTION_NAME(one)(state, values[row]); + FUNCTION_NAME(one)(state, value); } MemoryContextSwitchTo(old); } diff --git a/tsl/src/nodes/vector_agg/function/functions.c b/tsl/src/nodes/vector_agg/function/functions.c index a5bb1226929..7c4e9af4e3b 100644 --- a/tsl/src/nodes/vector_agg/function/functions.c +++ b/tsl/src/nodes/vector_agg/function/functions.c @@ -49,11 +49,28 @@ count_star_const(void *agg_state, Datum constvalue, bool constisnull, int n, state->count += n; } +static void +count_star_many_scalar(void *restrict agg_states, uint32 *restrict offsets, int n, Datum constvalue, + bool constisnull, MemoryContext agg_extra_mctx) +{ + CountState *states = (CountState *) agg_states; + for (int row = 0; row < n; row++) + { + if (offsets[row] == 0) + { + continue; + } + + states[offsets[row]].count++; + } +} + VectorAggFunctions count_star_agg = { .state_bytes = sizeof(CountState), .agg_init = count_init, .agg_const = count_star_const, .agg_emit = count_emit, + .agg_many_scalar = count_star_many_scalar, }; /* @@ -111,7 +128,7 @@ count_any_vector(void *agg_state, const ArrowArray *vector, const uint64 *filter } static void -count_any_many(void *restrict agg_states, int32 *restrict offsets, const ArrowArray *vector, +count_any_many(void *restrict agg_states, uint32 *restrict offsets, const ArrowArray *vector, MemoryContext agg_extra_mctx) { const int n = vector->length; diff --git a/tsl/src/nodes/vector_agg/function/functions.h b/tsl/src/nodes/vector_agg/function/functions.h index 70ec3d5a2c7..9f080a8b823 100644 --- a/tsl/src/nodes/vector_agg/function/functions.h +++ b/tsl/src/nodes/vector_agg/function/functions.h @@ -27,9 +27,12 @@ typedef struct void (*agg_const)(void *restrict agg_state, Datum constvalue, bool constisnull, int n, MemoryContext agg_extra_mctx); - void (*agg_many)(void *restrict agg_states, int32 *restrict offsets, const ArrowArray *vector, + void (*agg_many)(void *restrict agg_states, uint32 *restrict offsets, const ArrowArray *vector, MemoryContext agg_extra_mctx); + void (*agg_many_scalar)(void *restrict agg_states, uint32 *restrict offsets, int n, + Datum constvalue, bool constisnull, MemoryContext agg_extra_mctx); + /* Emit a partial result. */ void (*agg_emit)(void *restrict agg_state, Datum *out_result, bool *out_isnull); } VectorAggFunctions; diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index e11cdbd3995..aab2ba2b280 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -124,7 +124,7 @@ gp_hash_reset(GroupingPolicy *obj) static void compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_def, void *agg_states, - int32 *offsets, MemoryContext agg_extra_mctx) + uint32 *offsets, MemoryContext agg_extra_mctx) { ArrowArray *arg_arrow = NULL; Datum arg_datum = 0; @@ -163,67 +163,54 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de else { /* - * Scalar argument, or count(*). + * Scalar argument, or count(*). The latter has an optimized + * implementation for this case. */ - for (int i = 0; i < batch_state->total_batch_rows; i++) + if (agg_def->func->agg_many_scalar != NULL) { - if (!arrow_row_is_valid(batch_state->vector_qual_result, i)) + agg_def->func->agg_many_scalar(agg_states, + offsets, + batch_state->total_batch_rows, + arg_datum, + arg_isnull, + agg_extra_mctx); + } + else + { + for (int i = 0; i < batch_state->total_batch_rows; i++) { - continue; - } + if (offsets[i] == 0) + { + continue; + } - if (offsets[i] == 0) - { - continue; + void *state = (offsets[i] * agg_def->func->state_bytes + (char *) agg_states); + agg_def->func->agg_const(state, arg_datum, arg_isnull, 1, agg_extra_mctx); } - - void *state = (offsets[i] * agg_def->func->state_bytes + (char *) agg_states); - agg_def->func->agg_const(state, arg_datum, arg_isnull, 1, agg_extra_mctx); } } } -static void -gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) +static pg_attribute_always_inline uint32 +fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int key_column_index, uint32 next_unused_state_index, uint32 *restrict offsets, + void (*get_key)(CompressedColumnValues column, int row, Datum *key, bool *valid)) { - GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; - - Assert(!policy->returning_results); - - /* - * State index zero is invalid, and state index one is for null key. We have - * to initialize it at the first run. - */ - const uint64 last_initialized_state_index = - policy->table->members ? policy->table->members + 2 : 1; - uint64 next_unused_state_index = policy->table->members + 2; - - int32 offsets[1000] = { 0 }; - Assert(batch_state->total_batch_rows <= 1000); - - /* - * For the partial aggregation node, the grouping columns are always in the - * output, so we don't have to separately look at the list of the grouping - * columns. - */ - Assert(list_length(policy->output_grouping_columns) == 1); - GroupingColumn *g = linitial(policy->output_grouping_columns); - CompressedColumnValues *gv = &batch_state->compressed_columns[g->input_offset]; + CompressedColumnValues column = batch_state->compressed_columns[key_column_index]; // Assert(gv->decompression_type == 8 /* lolwut */); - Assert(gv->decompression_type > 0); - const void *vv = gv->arrow->buffers[1]; - const uint64 *key_validity = gv->arrow->buffers[0]; - const uint64 *filter = batch_state->vector_qual_result; + const uint64 *restrict filter = batch_state->vector_qual_result; for (int row = 0; row < batch_state->total_batch_rows; row++) { + bool key_valid = false; Datum key = { 0 }; - memcpy(&key, gv->decompression_type * row + (char *) vv, gv->decompression_type); + get_key(column, row, &key, &key_valid); + if (!arrow_row_is_valid(filter, row)) { continue; } - if (arrow_row_is_valid(key_validity, row)) + if (key_valid) { bool found = false; HashEntry *entry = h_insert(policy->table, key, &found); @@ -240,7 +227,174 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) } } - policy->stat_input_valid_rows += arrow_num_valid(key_validity, batch_state->total_batch_rows); + policy->stat_input_valid_rows += + arrow_num_valid(column.buffers[0], batch_state->total_batch_rows); + return next_unused_state_index; +} + +// static pg_attribute_always_inline +// void get_key_generic(CompressedColumnValues *column, int row, Datum *key, bool *valid) +//{ +// Assert(column->decompression_type > 0); +// const void *values = column->arrow->buffers[1]; +// const uint64 *key_validity = column->arrow->buffers[0]; +// *valid = arrow_row_is_valid(key_validity, row); +// memcpy(key, column->decompression_type * row + (char *) values, column->decompression_type); +// } + +static pg_attribute_always_inline void +get_key_arrow_fixed(CompressedColumnValues column, int row, int key_bytes, Datum *restrict key, + bool *restrict valid) +{ + Assert(column.decompression_type == key_bytes); + const void *values = column.buffers[1]; + const uint64 *key_validity = column.buffers[0]; + *valid = arrow_row_is_valid(key_validity, row); + memcpy(key, key_bytes * row + (char *) values, key_bytes); +} + +static pg_attribute_always_inline void +get_key_arrow_fixed_2(CompressedColumnValues column, int row, Datum *restrict key, + bool *restrict valid) +{ + get_key_arrow_fixed(column, row, 2, key, valid); +} + +static pg_attribute_always_inline void +get_key_arrow_fixed_4(CompressedColumnValues column, int row, Datum *restrict key, + bool *restrict valid) +{ + get_key_arrow_fixed(column, row, 4, key, valid); +} + +static pg_attribute_always_inline void +get_key_arrow_fixed_8(CompressedColumnValues column, int row, Datum *restrict key, + bool *restrict valid) +{ + /* FIXME for float8 not by value */ + get_key_arrow_fixed(column, row, 8, key, valid); +} + +static pg_attribute_always_inline void +get_key_scalar(CompressedColumnValues column, int row, Datum *restrict key, bool *restrict valid) +{ + Assert(column.decompression_type == DT_Scalar); + *key = *column.output_value; + *valid = !*column.output_isnull; +} + +static pg_noinline uint32 +fill_offsets_arrow_fixed_8(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int key_column_index, uint32 next_unused_state_index, + uint32 *restrict offsets) +{ + return fill_offsets_impl(policy, + batch_state, + key_column_index, + next_unused_state_index, + offsets, + get_key_arrow_fixed_8); +} + +static pg_noinline uint32 +fill_offsets_arrow_fixed_4(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int key_column_index, uint32 next_unused_state_index, + uint32 *restrict offsets) +{ + return fill_offsets_impl(policy, + batch_state, + key_column_index, + next_unused_state_index, + offsets, + get_key_arrow_fixed_4); +} + +static pg_noinline uint32 +fill_offsets_arrow_fixed_2(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int key_column_index, uint32 next_unused_state_index, + uint32 *restrict offsets) +{ + return fill_offsets_impl(policy, + batch_state, + key_column_index, + next_unused_state_index, + offsets, + get_key_arrow_fixed_2); +} + +static pg_noinline uint32 +fill_offsets_scalar(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int key_column_index, uint32 next_unused_state_index, uint32 *restrict offsets) +{ + return fill_offsets_impl(policy, + batch_state, + key_column_index, + next_unused_state_index, + offsets, + get_key_scalar); +} + +static void +gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) +{ + GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; + + Assert(!policy->returning_results); + + /* + * State index zero is invalid, and state index one is for null key. We have + * to initialize it at the first run. + */ + const uint32 last_initialized_state_index = + policy->table->members ? policy->table->members + 2 : 1; + uint32 next_unused_state_index = policy->table->members + 2; + + uint32 offsets[1000] = { 0 }; + Assert(batch_state->total_batch_rows <= 1000); + + /* + * For the partial aggregation node, the grouping columns are always in the + * output, so we don't have to separately look at the list of the grouping + * columns. + */ + Assert(list_length(policy->output_grouping_columns) == 1); + GroupingColumn *g = linitial(policy->output_grouping_columns); + CompressedColumnValues *key_column = &batch_state->compressed_columns[g->input_offset]; + + switch ((int) key_column->decompression_type) + { + case DT_Scalar: + next_unused_state_index = fill_offsets_scalar(policy, + batch_state, + g->input_offset, + next_unused_state_index, + offsets); + break; + case 8: + next_unused_state_index = fill_offsets_arrow_fixed_8(policy, + batch_state, + g->input_offset, + next_unused_state_index, + offsets); + break; + case 4: + next_unused_state_index = fill_offsets_arrow_fixed_4(policy, + batch_state, + g->input_offset, + next_unused_state_index, + offsets); + break; + case 2: + next_unused_state_index = fill_offsets_arrow_fixed_2(policy, + batch_state, + g->input_offset, + next_unused_state_index, + offsets); + break; + default: + Assert(false); + break; + } ListCell *aggdeflc; ListCell *aggstatelc; @@ -285,10 +439,10 @@ gp_hash_should_emit(GroupingPolicy *gp) { GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; (void) policy; -// if (policy->table->members + policy->have_null_key > 0) -// { -// return true; -// } + // if (policy->table->members + policy->have_null_key > 0) + // { + // return true; + // } return false; } @@ -302,9 +456,10 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) /* FIXME doesn't work on final result emission w/o should_emit. */ policy->returning_results = true; h_start_iterate(policy->table, &policy->iter); -// fprintf(stderr, "spill after %ld input rows, %d keys, %f ratio\n", -// policy->stat_input_valid_rows, policy->table->members + policy->have_null_key, -// policy->stat_input_valid_rows / (float) (policy->table->members + policy->have_null_key)); + // fprintf(stderr, "spill after %ld input rows, %d keys, %f ratio\n", + // policy->stat_input_valid_rows, policy->table->members + policy->have_null_key, + // policy->stat_input_valid_rows / (float) (policy->table->members + + // policy->have_null_key)); } HashEntry null_key_entry = { .agg_state_index = 1 }; diff --git a/tsl/test/expected/vector_agg_functions.out b/tsl/test/expected/vector_agg_functions.out index b600fa6018b..9e9f8115e57 100644 --- a/tsl/test/expected/vector_agg_functions.out +++ b/tsl/test/expected/vector_agg_functions.out @@ -314,7 +314,7 @@ select ss, min(cfloat4) from aggfns group by ss order by 1; ss | min ----+----------- 0 | -49.9756 - 1 | -49.9756 + 1 | NaN 2 | -49.9756 3 | -Infinity 4 | -49.9756 @@ -2006,13 +2006,13 @@ select ss, sum(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | sum ----+----------- 0 | -5395.24 - 1 | -5377.92 + 1 | -5377.93 2 | -5382.66 3 | -Infinity 4 | -5415.12 5 | -5415.12 - 6 | -5447.29 - 7 | -5447.29 + 6 | -5447.3 + 7 | -5447.3 8 | -5410.38 9 | -5439.41 (10 rows) @@ -3554,7 +3554,7 @@ select ss, min(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+---------- 0 | -49.9756 - 1 | -49.9756 + 1 | NaN 2 | -49.9756 3 | -49.9756 4 | -49.9756 @@ -3625,16 +3625,16 @@ select s, sum(cfloat4) from aggfns where cfloat8 <= 0 group by s order by 1; select ss, sum(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | sum ----+---------- - 0 | 2752.7 + 0 | 2752.71 1 | NaN 2 | Infinity - 3 | 2727.98 - 4 | 2764.68 - 5 | 2802.89 - 6 | 2807.59 + 3 | 2727.99 + 4 | 2764.69 + 5 | 2802.9 + 6 | 2807.6 7 | 2826.29 - 8 | 2784.1 - 9 | 2784.1 + 8 | 2784.11 + 9 | 2784.11 (10 rows) select avg(cfloat8) from aggfns where cfloat8 <= 0 order by 1; @@ -5174,7 +5174,7 @@ select ss, min(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+----------- 0 | -49.9756 - 1 | -49.9756 + 1 | NaN 2 | -49.9756 3 | -Infinity 4 | -49.9756 From f8db4545b5a7274714bee351c2e8bc2cf0108c97 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:15:08 +0200 Subject: [PATCH 06/58] benchmark vectorized grouping (2024-10-02 no. 9) From 00a9d11659154af6e7a2b38f0dc691fcb156ae99 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:26:57 +0200 Subject: [PATCH 07/58] someething --- tsl/src/import/ts_simplehash.h | 1047 +++++++++++++++++ .../function/float48_accum_single.c | 9 +- tsl/src/nodes/vector_agg/function/functions.c | 9 +- tsl/src/nodes/vector_agg/function/functions.h | 4 +- .../vector_agg/function/int128_accum_single.c | 13 +- .../function/int24_avg_accum_templates.c | 11 +- .../vector_agg/function/int24_sum_templates.c | 11 +- .../vector_agg/function/minmax_templates.c | 11 +- .../vector_agg/function/sum_float_templates.c | 11 +- .../nodes/vector_agg/grouping_policy_batch.c | 2 +- .../nodes/vector_agg/grouping_policy_hash.c | 68 +- tsl/test/expected/vectorized_aggregation.out | 34 +- 12 files changed, 1159 insertions(+), 71 deletions(-) create mode 100644 tsl/src/import/ts_simplehash.h diff --git a/tsl/src/import/ts_simplehash.h b/tsl/src/import/ts_simplehash.h new file mode 100644 index 00000000000..2a630f24017 --- /dev/null +++ b/tsl/src/import/ts_simplehash.h @@ -0,0 +1,1047 @@ +/* + * simplehash.h + * + * When included this file generates a "templated" (by way of macros) + * open-addressing hash table implementation specialized to user-defined + * types. + * + * It's probably not worthwhile to generate such a specialized implementation + * for hash tables that aren't performance or space sensitive. + * + * Compared to dynahash, simplehash has the following benefits: + * + * - Due to the "templated" code generation has known structure sizes and no + * indirect function calls (which show up substantially in dynahash + * profiles). These features considerably increase speed for small + * entries. + * - Open addressing has better CPU cache behavior than dynahash's chained + * hashtables. + * - The generated interface is type-safe and easier to use than dynahash, + * though at the cost of more complex setup. + * - Allocates memory in a MemoryContext or another allocator with a + * malloc/free style interface (which isn't easily usable in a shared + * memory context) + * - Does not require the overhead of a separate memory context. + * + * Usage notes: + * + * To generate a hash-table and associated functions for a use case several + * macros have to be #define'ed before this file is included. Including + * the file #undef's all those, so a new hash table can be generated + * afterwards. + * The relevant parameters are: + * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo' + * will result in hash table type 'foo_hash' and functions like + * 'foo_insert'/'foo_lookup' and so forth. + * - SH_ELEMENT_TYPE - type of the contained elements + * - SH_KEY_TYPE - type of the hashtable's key + * - SH_DECLARE - if defined function prototypes and type declarations are + * generated + * - SH_DEFINE - if defined function definitions are generated + * - SH_SCOPE - in which scope (e.g. extern, static inline) do function + * declarations reside + * - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead, + * use this to allocate bytes. The allocator must zero the returned space. + * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions + * are defined, so you can supply your own + * The following parameters are only relevant when SH_DEFINE is defined: + * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key + * - SH_EQUAL(table, a, b) - compare two table keys + * - SH_HASH_KEY(table, key) - generate hash for the key + * - SH_STORE_HASH - if defined the hash is stored in the elements + * - SH_GET_HASH(tb, a) - return the field to store the hash in + * + * While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because + * the hash table implementation needs to compare hashes to move elements + * (particularly when growing the hash), it's preferable, if possible, to + * store the element's hash in the element's data type. If the hash is so + * stored, the hash table will also compare hashes before calling SH_EQUAL + * when comparing two keys. + * + * For convenience the hash table create functions accept a void pointer + * that will be stored in the hash table type's member private_data. This + * allows callbacks to reference caller provided data. + * + * For examples of usage look at tidbitmap.c (file local definition) and + * execnodes.h/execGrouping.c (exposed declaration, file local + * implementation). + * + * Hash table design: + * + * The hash table design chosen is a variant of linear open-addressing. The + * reason for doing so is that linear addressing is CPU cache & pipeline + * friendly. The biggest disadvantage of simple linear addressing schemes + * are highly variable lookup times due to clustering, and deletions + * leaving a lot of tombstones around. To address these issues a variant + * of "robin hood" hashing is employed. Robin hood hashing optimizes + * chaining lengths by moving elements close to their optimal bucket + * ("rich" elements), out of the way if a to-be-inserted element is further + * away from its optimal position (i.e. it's "poor"). While that can make + * insertions slower, the average lookup performance is a lot better, and + * higher fill factors can be used in a still performant manner. To avoid + * tombstones - which normally solve the issue that a deleted node's + * presence is relevant to determine whether a lookup needs to continue + * looking or is done - buckets following a deleted element are shifted + * backwards, unless they're empty or already at their optimal position. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/lib/simplehash.h + */ + +#include "port/pg_bitutils.h" + +/* helpers */ +#define SH_MAKE_PREFIX(a) CppConcat(a, _) +#define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX), name) +#define SH_MAKE_NAME_(a, b) CppConcat(a, b) + +/* name macros for: */ + +/* type declarations */ +#define SH_TYPE SH_MAKE_NAME(hash) +#define SH_ITERATOR SH_MAKE_NAME(iterator) + +/* function declarations */ +#define SH_CREATE SH_MAKE_NAME(create) +#define SH_DESTROY SH_MAKE_NAME(destroy) +#define SH_RESET SH_MAKE_NAME(reset) +#define SH_INSERT SH_MAKE_NAME(insert) +#define SH_INSERT_HASH SH_MAKE_NAME(insert_hash) +#define SH_DELETE_ITEM SH_MAKE_NAME(delete_item) +#define SH_DELETE SH_MAKE_NAME(delete) +#define SH_LOOKUP SH_MAKE_NAME(lookup) +#define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash) +#define SH_GROW SH_MAKE_NAME(grow) +#define SH_START_ITERATE SH_MAKE_NAME(start_iterate) +#define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at) +#define SH_ITERATE SH_MAKE_NAME(iterate) +#define SH_ALLOCATE SH_MAKE_NAME(allocate) +#define SH_FREE SH_MAKE_NAME(free) +#define SH_STAT SH_MAKE_NAME(stat) + +/* internal helper functions (no externally visible prototypes) */ +#define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters) +#define SH_NEXT SH_MAKE_NAME(next) +#define SH_PREV SH_MAKE_NAME(prev) +#define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance) +#define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket) +#define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash) +#define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal) +#define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal) + +/* generate forward declarations necessary to use the hash table */ +#ifdef SH_DECLARE + +/* type definitions */ +typedef struct SH_TYPE +{ + /* + * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash + * tables. Note that the maximum number of elements is lower + * (SH_MAX_FILLFACTOR) + */ + uint64 size; + + /* how many elements have valid contents */ + uint32 members; + + /* mask for bucket and size calculations, based on size */ + uint32 sizemask; + + /* boundary after which to grow hashtable */ + uint32 grow_threshold; + + /* hash buckets */ + SH_ELEMENT_TYPE *restrict data; + +#ifndef SH_RAW_ALLOCATOR + /* memory context to use for allocations */ + MemoryContext ctx; +#endif + + /* user defined data, useful for callbacks */ + void *private_data; +} SH_TYPE; + +typedef struct SH_ITERATOR +{ + uint32 cur; /* current element */ + uint32 end; + bool done; /* iterator exhausted? */ +} SH_ITERATOR; + +/* externally visible function prototypes */ +#ifdef SH_RAW_ALLOCATOR +/* _hash _create(uint32 nelements, void *private_data) */ +SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data); +#else +/* + * _hash _create(MemoryContext ctx, uint32 nelements, + * void *private_data) + */ +SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data); +#endif + +/* void _destroy(_hash *tb) */ +SH_SCOPE void SH_DESTROY(SH_TYPE *tb); + +/* void _reset(_hash *tb) */ +SH_SCOPE void SH_RESET(SH_TYPE *tb); + +/* void _grow(_hash *tb, uint64 newsize) */ +SH_SCOPE void SH_GROW(SH_TYPE *tb, uint64 newsize); + +/* *_insert(_hash *tb, key, bool *found) */ +SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found); + +/* + * *_insert_hash(_hash *tb, key, uint32 hash, + * bool *found) + */ +SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash, bool *found); + +/* *_lookup(_hash *tb, key) */ +SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE *tb, SH_KEY_TYPE key); + +/* *_lookup_hash(_hash *tb, key, uint32 hash) */ +SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash); + +/* void _delete_item(_hash *tb, *entry) */ +SH_SCOPE void SH_DELETE_ITEM(SH_TYPE *tb, SH_ELEMENT_TYPE *entry); + +/* bool _delete(_hash *tb, key) */ +SH_SCOPE bool SH_DELETE(SH_TYPE *tb, SH_KEY_TYPE key); + +/* void _start_iterate(_hash *tb, _iterator *iter) */ +SH_SCOPE void SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter); + +/* + * void _start_iterate_at(_hash *tb, _iterator *iter, + * uint32 at) + */ +SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE *tb, SH_ITERATOR *iter, uint32 at); + +/* *_iterate(_hash *tb, _iterator *iter) */ +SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter); + +/* void _stat(_hash *tb */ +SH_SCOPE void SH_STAT(SH_TYPE *tb); + +#endif /* SH_DECLARE */ + +/* generate implementation of the hash table */ +#ifdef SH_DEFINE + +#ifndef SH_RAW_ALLOCATOR +#include "utils/memutils.h" +#endif + +/* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */ +#define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1) + +/* normal fillfactor, unless already close to maximum */ +#ifndef SH_FILLFACTOR +#define SH_FILLFACTOR (0.9) +#endif +/* increase fillfactor if we otherwise would error out */ +#define SH_MAX_FILLFACTOR (0.98) +/* grow if actual and optimal location bigger than */ +#ifndef SH_GROW_MAX_DIB +#define SH_GROW_MAX_DIB 25 +#endif +/* grow if more than elements to move when inserting */ +#ifndef SH_GROW_MAX_MOVE +#define SH_GROW_MAX_MOVE 150 +#endif +#ifndef SH_GROW_MIN_FILLFACTOR +/* but do not grow due to SH_GROW_MAX_* if below */ +#define SH_GROW_MIN_FILLFACTOR 0.1 +#endif + +#ifdef SH_STORE_HASH +#define SH_COMPARE_KEYS(tb, ahash, akey, b) \ + (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey)) +#else +#define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey)) +#endif + +/* + * Wrap the following definitions in include guards, to avoid multiple + * definition errors if this header is included more than once. The rest of + * the file deliberately has no include guards, because it can be included + * with different parameters to define functions and types with non-colliding + * names. + */ +#ifndef SIMPLEHASH_H +#define SIMPLEHASH_H + +#ifdef FRONTEND +#define sh_error(...) pg_fatal(__VA_ARGS__) +#define sh_log(...) pg_log_info(__VA_ARGS__) +#else +#define sh_error(...) elog(ERROR, __VA_ARGS__) +#define sh_log(...) elog(LOG, __VA_ARGS__) +#endif + +#endif + +/* + * Compute sizing parameters for hashtable. Called when creating and growing + * the hashtable. + */ +static inline void +SH_COMPUTE_PARAMETERS(SH_TYPE *tb, uint64 newsize) +{ + uint64 size; + + /* supporting zero sized hashes would complicate matters */ + size = Max(newsize, 2); + + /* round up size to the next power of 2, that's how bucketing works */ + size = pg_nextpower2_64(size); + Assert(size <= SH_MAX_SIZE); + + /* + * Verify that allocation of ->data is possible on this platform, without + * overflowing Size. + */ + if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2)) + sh_error("hash table too large"); + + /* now set size */ + tb->size = size; + tb->sizemask = (uint32) (size - 1); + + /* + * Compute the next threshold at which we need to grow the hash table + * again. + */ + if (tb->size == SH_MAX_SIZE) + tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR; + else + tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR; +} + +/* return the optimal bucket for the hash */ +static pg_attribute_always_inline uint32 +SH_INITIAL_BUCKET(SH_TYPE *tb, uint32 hash) +{ + return hash & tb->sizemask; +} + +/* return next bucket after the current, handling wraparound */ +static inline uint32 +SH_NEXT(SH_TYPE *tb, uint32 curelem, uint32 startelem) +{ + curelem = (curelem + 1) & tb->sizemask; + + Assert(curelem != startelem); + + return curelem; +} + +/* return bucket before the current, handling wraparound */ +static inline uint32 +SH_PREV(SH_TYPE *tb, uint32 curelem, uint32 startelem) +{ + curelem = (curelem - 1) & tb->sizemask; + + Assert(curelem != startelem); + + return curelem; +} + +/* return distance between bucket and its optimal position */ +static inline uint32 +SH_DISTANCE_FROM_OPTIMAL(SH_TYPE *tb, uint32 optimal, uint32 bucket) +{ + if (optimal <= bucket) + return bucket - optimal; + else + return (tb->size + bucket) - optimal; +} + +static inline uint32 +SH_ENTRY_HASH(SH_TYPE *tb, SH_ELEMENT_TYPE *entry) +{ +#ifdef SH_STORE_HASH + return SH_GET_HASH(tb, entry); +#else + return SH_HASH_KEY(tb, entry->SH_KEY); +#endif +} + +/* default memory allocator function */ +static inline void *SH_ALLOCATE(SH_TYPE *type, Size size); +static inline void SH_FREE(SH_TYPE *type, void *pointer); + +#ifndef SH_USE_NONDEFAULT_ALLOCATOR + +/* default memory allocator function */ +static inline void * +SH_ALLOCATE(SH_TYPE *type, Size size) +{ +#ifdef SH_RAW_ALLOCATOR + return SH_RAW_ALLOCATOR(size); +#else + return MemoryContextAllocExtended(type->ctx, size, MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO); +#endif +} + +/* default memory free function */ +static inline void +SH_FREE(SH_TYPE *type, void *pointer) +{ + pfree(pointer); +} + +#endif + +/* + * Create a hash table with enough space for `nelements` distinct members. + * Memory for the hash table is allocated from the passed-in context. If + * desired, the array of elements can be allocated using a passed-in allocator; + * this could be useful in order to place the array of elements in a shared + * memory, or in a context that will outlive the rest of the hash table. + * Memory other than for the array of elements will still be allocated from + * the passed-in context. + */ +#ifdef SH_RAW_ALLOCATOR +SH_SCOPE SH_TYPE * +SH_CREATE(uint32 nelements, void *private_data) +#else +SH_SCOPE SH_TYPE * +SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data) +#endif +{ + SH_TYPE *tb; + uint64 size; + +#ifdef SH_RAW_ALLOCATOR + tb = (SH_TYPE *) SH_RAW_ALLOCATOR(sizeof(SH_TYPE)); +#else + tb = (SH_TYPE *) MemoryContextAllocZero(ctx, sizeof(SH_TYPE)); + tb->ctx = ctx; +#endif + tb->private_data = private_data; + + /* increase nelements by fillfactor, want to store nelements elements */ + size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR); + + SH_COMPUTE_PARAMETERS(tb, size); + + tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size); + + return tb; +} + +/* destroy a previously created hash table */ +SH_SCOPE void +SH_DESTROY(SH_TYPE *tb) +{ + SH_FREE(tb, tb->data); + pfree(tb); +} + +/* reset the contents of a previously created hash table */ +SH_SCOPE void +SH_RESET(SH_TYPE *tb) +{ + memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size); + tb->members = 0; +} + +/* + * Grow a hash table to at least `newsize` buckets. + * + * Usually this will automatically be called by insertions/deletions, when + * necessary. But resizing to the exact input size can be advantageous + * performance-wise, when known at some point. + */ +SH_SCOPE void +SH_GROW(SH_TYPE *tb, uint64 newsize) +{ + uint64 oldsize = tb->size; + SH_ELEMENT_TYPE *olddata = tb->data; + SH_ELEMENT_TYPE *newdata; + uint32 i; + uint32 startelem = 0; + uint32 copyelem; + + Assert(oldsize == pg_nextpower2_64(oldsize)); + Assert(oldsize != SH_MAX_SIZE); + Assert(oldsize < newsize); + + /* compute parameters for new table */ + SH_COMPUTE_PARAMETERS(tb, newsize); + + tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size); + + newdata = tb->data; + + /* + * Copy entries from the old data to newdata. We theoretically could use + * SH_INSERT here, to avoid code duplication, but that's more general than + * we need. We neither want tb->members increased, nor do we need to do + * deal with deleted elements, nor do we need to compare keys. So a + * special-cased implementation is lot faster. As resizing can be time + * consuming and frequent, that's worthwhile to optimize. + * + * To be able to simply move entries over, we have to start not at the + * first bucket (i.e olddata[0]), but find the first bucket that's either + * empty, or is occupied by an entry at its optimal position. Such a + * bucket has to exist in any table with a load factor under 1, as not all + * buckets are occupied, i.e. there always has to be an empty bucket. By + * starting at such a bucket we can move the entries to the larger table, + * without having to deal with conflicts. + */ + + /* search for the first element in the hash that's not wrapped around */ + for (i = 0; i < oldsize; i++) + { + SH_ELEMENT_TYPE *oldentry = &olddata[i]; + uint32 hash; + uint32 optimal; + + if (SH_ENTRY_EMPTY(oldentry)) + { + startelem = i; + break; + } + + hash = SH_ENTRY_HASH(tb, oldentry); + optimal = SH_INITIAL_BUCKET(tb, hash); + + if (optimal == i) + { + startelem = i; + break; + } + } + + /* and copy all elements in the old table */ + copyelem = startelem; + for (i = 0; i < oldsize; i++) + { + SH_ELEMENT_TYPE *oldentry = &olddata[copyelem]; + + if (!SH_ENTRY_EMPTY(oldentry)) + { + uint32 hash; + uint32 startelem; + uint32 curelem; + SH_ELEMENT_TYPE *newentry; + + hash = SH_ENTRY_HASH(tb, oldentry); + startelem = SH_INITIAL_BUCKET(tb, hash); + curelem = startelem; + + /* find empty element to put data into */ + while (true) + { + newentry = &newdata[curelem]; + + if (SH_ENTRY_EMPTY(newentry)) + { + break; + } + + curelem = SH_NEXT(tb, curelem, startelem); + } + + /* copy entry to new slot */ + memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE)); + } + + /* can't use SH_NEXT here, would use new size */ + copyelem++; + if (copyelem >= oldsize) + { + copyelem = 0; + } + } + + SH_FREE(tb, olddata); +} + +/* + * This is a separate static inline function, so it can be reliably be inlined + * into its wrapper functions even if SH_SCOPE is extern. + */ +static pg_attribute_always_inline SH_ELEMENT_TYPE * +SH_INSERT_HASH_INTERNAL(SH_TYPE *restrict tb, SH_KEY_TYPE key, uint32 hash, bool *found) +{ + uint32 startelem; + uint32 curelem; + SH_ELEMENT_TYPE *restrict data; + uint32 insertdist; + +restart: + insertdist = 0; + + /* + * We do the grow check even if the key is actually present, to avoid + * doing the check inside the loop. This also lets us avoid having to + * re-find our position in the hashtable after resizing. + * + * Note that this also reached when resizing the table due to + * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE. + */ + if (unlikely(tb->members >= tb->grow_threshold)) + { + if (unlikely(tb->size == SH_MAX_SIZE)) + sh_error("hash table size exceeded"); + + /* + * When optimizing, it can be very useful to print these out. + */ + /* SH_STAT(tb); */ + SH_GROW(tb, tb->size * 2); + /* SH_STAT(tb); */ + } + + /* perform insert, start bucket search at optimal location */ + data = tb->data; + startelem = SH_INITIAL_BUCKET(tb, hash); + curelem = startelem; + while (true) + { + uint32 curdist; + uint32 curhash; + uint32 curoptimal; + SH_ELEMENT_TYPE *entry = &data[curelem]; + + /* any empty bucket can directly be used */ + if (SH_ENTRY_EMPTY(entry)) + { + tb->members++; + entry->SH_KEY = key; +#ifdef SH_STORE_HASH + SH_GET_HASH(tb, entry) = hash; +#endif + *found = false; + return entry; + } + + /* + * If the bucket is not empty, we either found a match (in which case + * we're done), or we have to decide whether to skip over or move the + * colliding entry. When the colliding element's distance to its + * optimal position is smaller than the to-be-inserted entry's, we + * shift the colliding entry (and its followers) forward by one. + */ + + if (SH_COMPARE_KEYS(tb, hash, key, entry)) + { + Assert(!SH_ENTRY_EMPTY(entry)); + *found = true; + return entry; + } + + curhash = SH_ENTRY_HASH(tb, entry); + curoptimal = SH_INITIAL_BUCKET(tb, curhash); + curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem); + + if (insertdist > curdist) + { + SH_ELEMENT_TYPE *lastentry = entry; + uint32 emptyelem = curelem; + uint32 moveelem; + int32 emptydist = 0; + + /* find next empty bucket */ + while (true) + { + SH_ELEMENT_TYPE *emptyentry; + + emptyelem = SH_NEXT(tb, emptyelem, startelem); + emptyentry = &data[emptyelem]; + + if (SH_ENTRY_EMPTY(emptyentry)) + { + lastentry = emptyentry; + break; + } + + /* + * To avoid negative consequences from overly imbalanced + * hashtables, grow the hashtable if collisions would require + * us to move a lot of entries. The most likely cause of such + * imbalance is filling a (currently) small table, from a + * currently big one, in hash-table order. Don't grow if the + * hashtable would be too empty, to prevent quick space + * explosion for some weird edge cases. + */ + if (unlikely(++emptydist > SH_GROW_MAX_MOVE) && + ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) + { + tb->grow_threshold = 0; + goto restart; + } + } + + /* shift forward, starting at last occupied element */ + + /* + * TODO: This could be optimized to be one memcpy in many cases, + * excepting wrapping around at the end of ->data. Hasn't shown up + * in profiles so far though. + */ + moveelem = emptyelem; + while (moveelem != curelem) + { + SH_ELEMENT_TYPE *moveentry; + + moveelem = SH_PREV(tb, moveelem, startelem); + moveentry = &data[moveelem]; + + memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE)); + lastentry = moveentry; + } + + /* and fill the now empty spot */ + tb->members++; + + entry->SH_KEY = key; +#ifdef SH_STORE_HASH + SH_GET_HASH(tb, entry) = hash; +#endif + *found = false; + return entry; + } + + curelem = SH_NEXT(tb, curelem, startelem); + insertdist++; + + /* + * To avoid negative consequences from overly imbalanced hashtables, + * grow the hashtable if collisions lead to large runs. The most + * likely cause of such imbalance is filling a (currently) small + * table, from a currently big one, in hash-table order. Don't grow + * if the hashtable would be too empty, to prevent quick space + * explosion for some weird edge cases. + */ + if (unlikely(insertdist > SH_GROW_MAX_DIB) && + ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) + { + tb->grow_threshold = 0; + goto restart; + } + } +} + +/* + * Insert the key key into the hash-table, set *found to true if the key + * already exists, false otherwise. Returns the hash-table entry in either + * case. + */ +static pg_attribute_always_inline SH_ELEMENT_TYPE * +SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found) +{ + uint32 hash = SH_HASH_KEY(tb, key); + + return SH_INSERT_HASH_INTERNAL(tb, key, hash, found); +} + +/* + * Insert the key key into the hash-table using an already-calculated + * hash. Set *found to true if the key already exists, false + * otherwise. Returns the hash-table entry in either case. + */ +SH_SCOPE SH_ELEMENT_TYPE * +SH_INSERT_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash, bool *found) +{ + return SH_INSERT_HASH_INTERNAL(tb, key, hash, found); +} + +/* + * This is a separate static inline function, so it can be reliably be inlined + * into its wrapper functions even if SH_SCOPE is extern. + */ +static inline SH_ELEMENT_TYPE * +SH_LOOKUP_HASH_INTERNAL(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash) +{ + const uint32 startelem = SH_INITIAL_BUCKET(tb, hash); + uint32 curelem = startelem; + + while (true) + { + SH_ELEMENT_TYPE *entry = &tb->data[curelem]; + + if (SH_ENTRY_EMPTY(entry)) + { + return NULL; + } + + Assert(!SH_ENTRY_EMPTY(entry)); + + if (SH_COMPARE_KEYS(tb, hash, key, entry)) + return entry; + + /* + * TODO: we could stop search based on distance. If the current + * buckets's distance-from-optimal is smaller than what we've skipped + * already, the entry doesn't exist. Probably only do so if + * SH_STORE_HASH is defined, to avoid re-computing hashes? + */ + + curelem = SH_NEXT(tb, curelem, startelem); + } +} + +/* + * Lookup up entry in hash table. Returns NULL if key not present. + */ +SH_SCOPE SH_ELEMENT_TYPE * +SH_LOOKUP(SH_TYPE *tb, SH_KEY_TYPE key) +{ + uint32 hash = SH_HASH_KEY(tb, key); + + return SH_LOOKUP_HASH_INTERNAL(tb, key, hash); +} + +/* + * Lookup up entry in hash table using an already-calculated hash. + * + * Returns NULL if key not present. + */ +SH_SCOPE SH_ELEMENT_TYPE * +SH_LOOKUP_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash) +{ + return SH_LOOKUP_HASH_INTERNAL(tb, key, hash); +} + +/* + * Initialize iterator. + */ +SH_SCOPE void +SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter) +{ + uint64 startelem = PG_UINT64_MAX; + + /* + * Search for the first empty element. As deletions during iterations are + * supported, we want to start/end at an element that cannot be affected + * by elements being shifted. + */ + for (uint32 i = 0; i < tb->size; i++) + { + SH_ELEMENT_TYPE *entry = &tb->data[i]; + + if (SH_ENTRY_EMPTY(entry)) + { + startelem = i; + break; + } + } + + /* we should have found an empty element */ + Assert(startelem < SH_MAX_SIZE); + + /* + * Iterate backwards, that allows the current element to be deleted, even + * if there are backward shifts + */ + iter->cur = startelem; + iter->end = iter->cur; + iter->done = false; +} + +/* + * Initialize iterator to a specific bucket. That's really only useful for + * cases where callers are partially iterating over the hashspace, and that + * iteration deletes and inserts elements based on visited entries. Doing that + * repeatedly could lead to an unbalanced keyspace when always starting at the + * same position. + */ +SH_SCOPE void +SH_START_ITERATE_AT(SH_TYPE *tb, SH_ITERATOR *iter, uint32 at) +{ + /* + * Iterate backwards, that allows the current element to be deleted, even + * if there are backward shifts. + */ + iter->cur = at & tb->sizemask; /* ensure at is within a valid range */ + iter->end = iter->cur; + iter->done = false; +} + +/* + * Iterate over all entries in the hash-table. Return the next occupied entry, + * or NULL if done. + * + * During iteration the current entry in the hash table may be deleted, + * without leading to elements being skipped or returned twice. Additionally + * the rest of the table may be modified (i.e. there can be insertions or + * deletions), but if so, there's neither a guarantee that all nodes are + * visited at least once, nor a guarantee that a node is visited at most once. + */ +SH_SCOPE SH_ELEMENT_TYPE * +SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter) +{ + while (!iter->done) + { + SH_ELEMENT_TYPE *elem; + + elem = &tb->data[iter->cur]; + + /* next element in backward direction */ + iter->cur = (iter->cur - 1) & tb->sizemask; + + if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask)) + iter->done = true; + if (!SH_ENTRY_EMPTY(elem)) + { + return elem; + } + } + + return NULL; +} + +/* + * Report some statistics about the state of the hashtable. For + * debugging/profiling purposes only. + */ +SH_SCOPE void +SH_STAT(SH_TYPE *tb) +{ + uint32 max_chain_length = 0; + uint32 total_chain_length = 0; + double avg_chain_length; + double fillfactor; + uint32 i; + + uint32 *collisions = (uint32 *) palloc0(tb->size * sizeof(uint32)); + uint32 total_collisions = 0; + uint32 max_collisions = 0; + double avg_collisions; + + for (i = 0; i < tb->size; i++) + { + uint32 hash; + uint32 optimal; + uint32 dist; + SH_ELEMENT_TYPE *elem; + + elem = &tb->data[i]; + + if (SH_ENTRY_EMPTY(elem)) + continue; + + hash = SH_ENTRY_HASH(tb, elem); + optimal = SH_INITIAL_BUCKET(tb, hash); + dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i); + + if (dist > max_chain_length) + max_chain_length = dist; + total_chain_length += dist; + + collisions[optimal]++; + } + + for (i = 0; i < tb->size; i++) + { + uint32 curcoll = collisions[i]; + + if (curcoll == 0) + continue; + + /* single contained element is not a collision */ + curcoll--; + total_collisions += curcoll; + if (curcoll > max_collisions) + max_collisions = curcoll; + } + + /* large enough to be worth freeing, even if just used for debugging */ + pfree(collisions); + + if (tb->members > 0) + { + fillfactor = tb->members / ((double) tb->size); + avg_chain_length = ((double) total_chain_length) / tb->members; + avg_collisions = ((double) total_collisions) / tb->members; + } + else + { + fillfactor = 0; + avg_chain_length = 0; + avg_collisions = 0; + } + + sh_log("size: " UINT64_FORMAT + ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, " + "total_collisions: %u, max_collisions: %u, avg_collisions: %f", + tb->size, + tb->members, + fillfactor, + total_chain_length, + max_chain_length, + avg_chain_length, + total_collisions, + max_collisions, + avg_collisions); +} + +#endif /* SH_DEFINE */ + +/* undefine external parameters, so next hash table can be defined */ +#undef SH_PREFIX +#undef SH_KEY_TYPE +#undef SH_KEY +#undef SH_ELEMENT_TYPE +#undef SH_HASH_KEY +#undef SH_SCOPE +#undef SH_DECLARE +#undef SH_DEFINE +#undef SH_GET_HASH +#undef SH_STORE_HASH +#undef SH_USE_NONDEFAULT_ALLOCATOR +#undef SH_EQUAL + +/* undefine locally declared macros */ +#undef SH_MAKE_PREFIX +#undef SH_MAKE_NAME +#undef SH_MAKE_NAME_ +#undef SH_FILLFACTOR +#undef SH_MAX_FILLFACTOR +#undef SH_GROW_MAX_DIB +#undef SH_GROW_MAX_MOVE +#undef SH_GROW_MIN_FILLFACTOR +#undef SH_MAX_SIZE + +/* types */ +#undef SH_TYPE +#undef SH_ITERATOR + +/* external function names */ +#undef SH_CREATE +#undef SH_DESTROY +#undef SH_RESET +#undef SH_INSERT +#undef SH_INSERT_HASH +#undef SH_DELETE_ITEM +#undef SH_DELETE +#undef SH_LOOKUP +#undef SH_LOOKUP_HASH +#undef SH_GROW +#undef SH_START_ITERATE +#undef SH_START_ITERATE_AT +#undef SH_ITERATE +#undef SH_ALLOCATE +#undef SH_FREE +#undef SH_STAT + +/* internal function names */ +#undef SH_COMPUTE_PARAMETERS +#undef SH_COMPARE_KEYS +#undef SH_INITIAL_BUCKET +#undef SH_NEXT +#undef SH_PREV +#undef SH_DISTANCE_FROM_OPTIMAL +#undef SH_ENTRY_HASH +#undef SH_INSERT_HASH_INTERNAL +#undef SH_LOOKUP_HASH_INTERNAL diff --git a/tsl/src/nodes/vector_agg/function/float48_accum_single.c b/tsl/src/nodes/vector_agg/function/float48_accum_single.c index c54c0405384..6a07c28e238 100644 --- a/tsl/src/nodes/vector_agg/function/float48_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/float48_accum_single.c @@ -55,10 +55,13 @@ typedef struct } FUNCTION_NAME(state); static void -FUNCTION_NAME(init)(void *agg_state) +FUNCTION_NAME(init)(void *restrict agg_states, int n) { - FUNCTION_NAME(state) *state = (FUNCTION_NAME(state) *) agg_state; - *state = (FUNCTION_NAME(state)){ 0 }; + FUNCTION_NAME(state) *states = (FUNCTION_NAME(state) *) agg_states; + for (int i = 0; i < n; i++) + { + states[i] = (FUNCTION_NAME(state)){ 0 }; + } } static void diff --git a/tsl/src/nodes/vector_agg/function/functions.c b/tsl/src/nodes/vector_agg/function/functions.c index 7c4e9af4e3b..1ebb5b06b22 100644 --- a/tsl/src/nodes/vector_agg/function/functions.c +++ b/tsl/src/nodes/vector_agg/function/functions.c @@ -27,10 +27,13 @@ typedef struct } CountState; static void -count_init(void *agg_state) +count_init(void *restrict agg_states, int n) { - CountState *state = (CountState *) agg_state; - state->count = 0; + CountState *states = (CountState *) agg_states; + for (int i = 0; i < n; i++) + { + states[i].count = 0; + } } static void diff --git a/tsl/src/nodes/vector_agg/function/functions.h b/tsl/src/nodes/vector_agg/function/functions.h index 9f080a8b823..4d679e14277 100644 --- a/tsl/src/nodes/vector_agg/function/functions.h +++ b/tsl/src/nodes/vector_agg/function/functions.h @@ -16,8 +16,8 @@ typedef struct /* Size of the aggregate function state. */ size_t state_bytes; - /* Initialize the aggregate function state pointed to by agg_value and agg_isnull. */ - void (*agg_init)(void *restrict agg_state); + /* Initialize the aggregate function states. */ + void (*agg_init)(void *restrict agg_states, int n); /* Aggregate a given arrow array. */ void (*agg_vector)(void *restrict agg_state, const ArrowArray *vector, const uint64 *filter, diff --git a/tsl/src/nodes/vector_agg/function/int128_accum_single.c b/tsl/src/nodes/vector_agg/function/int128_accum_single.c index 39bd665f3c9..bf0f90e5044 100644 --- a/tsl/src/nodes/vector_agg/function/int128_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/int128_accum_single.c @@ -25,14 +25,17 @@ typedef struct } FUNCTION_NAME(state); static void -FUNCTION_NAME(init)(void *agg_state) +FUNCTION_NAME(init)(void *restrict agg_states, int n) { - FUNCTION_NAME(state) *state = (FUNCTION_NAME(state) *) agg_state; - state->N = 0; - state->sumX = 0; + FUNCTION_NAME(state) *states = (FUNCTION_NAME(state) *) agg_states; + for (int i = 0; i < n; i++) + { + states[i].N = 0; + states[i].sumX = 0; #ifdef NEED_SUMX2 - state->sumX2 = 0; + states[i].sumX2 = 0; #endif + } } static void diff --git a/tsl/src/nodes/vector_agg/function/int24_avg_accum_templates.c b/tsl/src/nodes/vector_agg/function/int24_avg_accum_templates.c index 0841eb79cc1..518a7491aab 100644 --- a/tsl/src/nodes/vector_agg/function/int24_avg_accum_templates.c +++ b/tsl/src/nodes/vector_agg/function/int24_avg_accum_templates.c @@ -27,11 +27,14 @@ typedef struct } Int24AvgAccumState; static void -int24_avg_accum_init(void *agg_state) +int24_avg_accum_init(void *restrict agg_states, int n) { - Int24AvgAccumState *state = (Int24AvgAccumState *) agg_state; - state->count = 0; - state->sum = 0; + Int24AvgAccumState *states = (Int24AvgAccumState *) agg_states; + for (int i = 0; i < n; i++) + { + states[i].count = 0; + states[i].sum = 0; + } } static void diff --git a/tsl/src/nodes/vector_agg/function/int24_sum_templates.c b/tsl/src/nodes/vector_agg/function/int24_sum_templates.c index 5e87f3c9a32..8b250cc91cc 100644 --- a/tsl/src/nodes/vector_agg/function/int24_sum_templates.c +++ b/tsl/src/nodes/vector_agg/function/int24_sum_templates.c @@ -28,11 +28,14 @@ typedef struct } Int24SumState; static void -int_sum_init(void *agg_state) +int_sum_init(void *restrict agg_states, int n) { - Int24SumState *state = (Int24SumState *) agg_state; - state->result = 0; - state->isnull = true; + Int24SumState *states = (Int24SumState *) agg_states; + for (int i = 0; i < n; i++) + { + states[i].result = 0; + states[i].isnull = true; + } } static void diff --git a/tsl/src/nodes/vector_agg/function/minmax_templates.c b/tsl/src/nodes/vector_agg/function/minmax_templates.c index be79cc3c1db..0775f6bd1f2 100644 --- a/tsl/src/nodes/vector_agg/function/minmax_templates.c +++ b/tsl/src/nodes/vector_agg/function/minmax_templates.c @@ -26,11 +26,14 @@ typedef struct } MinMaxState; static void -minmax_init(void *agg_state) +minmax_init(void *restrict agg_states, int n) { - MinMaxState *state = (MinMaxState *) agg_state; - state->isvalid = false; - state->value = 0; + MinMaxState *states = (MinMaxState *) agg_states; + for (int i = 0; i < n; i++) + { + states[i].isvalid = false; + states[i].value = 0; + } } static void diff --git a/tsl/src/nodes/vector_agg/function/sum_float_templates.c b/tsl/src/nodes/vector_agg/function/sum_float_templates.c index b24e3e3d812..f2b22523b34 100644 --- a/tsl/src/nodes/vector_agg/function/sum_float_templates.c +++ b/tsl/src/nodes/vector_agg/function/sum_float_templates.c @@ -23,11 +23,14 @@ typedef struct } FloatSumState; static void -float_sum_init(void *agg_state) +float_sum_init(void *restrict agg_states, int n) { - FloatSumState *state = (FloatSumState *) agg_state; - state->result = 0; - state->isnull = true; + FloatSumState *states = (FloatSumState *) agg_states; + for (int i = 0; i < n; i++) + { + states[i].result = 0; + states[i].isnull = true; + } } #endif diff --git a/tsl/src/nodes/vector_agg/grouping_policy_batch.c b/tsl/src/nodes/vector_agg/grouping_policy_batch.c index a37747f5166..b60d4f230da 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_batch.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_batch.c @@ -77,7 +77,7 @@ gp_batch_reset(GroupingPolicy *obj) { VectorAggDef *agg_def = (VectorAggDef *) list_nth(policy->agg_defs, i); void *agg_state = (void *) list_nth(policy->agg_states, i); - agg_def->func->agg_init(agg_state); + agg_def->func->agg_init(agg_state, 1); } const int ngrp = list_length(policy->output_grouping_columns); diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index aab2ba2b280..181261a1eeb 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -20,22 +20,35 @@ #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/vector_agg/exec.h" +#include + typedef struct { Datum key; - uint32 status; uint32 agg_state_index; } HashEntry; -static uint64_t -hash64(uint64_t x) +// static pg_attribute_always_inline uint64 +// hash64_1(uint64 x) +//{ +// x ^= x >> 30; +// x *= 0xbf58476d1ce4e5b9U; +// x ^= x >> 27; +// x *= 0x94d049bb133111ebU; +// x ^= x >> 31; +// return x; +// } + +static pg_attribute_always_inline uint64 +hash64_crc(uint64 x) +{ + return _mm_crc32_u64(~0ULL, x); +} + +static pg_attribute_always_inline uint64 +hash64(uint64 x) { - x ^= x >> 30; - x *= 0xbf58476d1ce4e5b9U; - x ^= x >> 27; - x *= 0x94d049bb133111ebU; - x ^= x >> 31; - return x; + return hash64_crc(x); } #define SH_PREFIX h @@ -47,7 +60,8 @@ hash64(uint64_t x) #define SH_SCOPE static inline #define SH_DECLARE #define SH_DEFINE -#include "lib/simplehash.h" +#define SH_ENTRY_EMPTY(entry) (entry->agg_state_index == 0) +#include "import/ts_simplehash.h" struct h_hash; @@ -199,6 +213,7 @@ fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, CompressedColumnValues column = batch_state->compressed_columns[key_column_index]; // Assert(gv->decompression_type == 8 /* lolwut */); const uint64 *restrict filter = batch_state->vector_qual_result; + struct h_hash *restrict table = policy->table; for (int row = 0; row < batch_state->total_batch_rows; row++) { bool key_valid = false; @@ -213,7 +228,7 @@ fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, if (key_valid) { bool found = false; - HashEntry *entry = h_insert(policy->table, key, &found); + HashEntry *restrict entry = h_insert(table, key, &found); if (!found) { entry->agg_state_index = next_unused_state_index++; @@ -415,12 +430,10 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) { - VectorAggDef *def = lfirst(aggdeflc); - for (uint64 i = last_initialized_state_index; i < next_unused_state_index; i++) - { - void *aggstate = def->func->state_bytes * i + (char *) lfirst(aggstatelc); - def->func->agg_init(aggstate); - } + const VectorAggDef *def = lfirst(aggdeflc); + def->func->agg_init(def->func->state_bytes * last_initialized_state_index + + (char *) lfirst(aggstatelc), + next_unused_state_index - last_initialized_state_index); } } @@ -443,6 +456,17 @@ gp_hash_should_emit(GroupingPolicy *gp) // { // return true; // } + /* + * Don't grow the hash table cardinality too much, otherwise we become bound + * by memory reads. In general, when this first stage of grouping doesn't + * significantly reduce the cardinality, it becomes pure overhead and the + * work will be done by the final Postgres aggregation, so we should bail + * out early here. + */ + if (policy->table->members * sizeof(HashEntry) > 128 * 1024) + { + return true; + } return false; } @@ -456,10 +480,12 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) /* FIXME doesn't work on final result emission w/o should_emit. */ policy->returning_results = true; h_start_iterate(policy->table, &policy->iter); - // fprintf(stderr, "spill after %ld input rows, %d keys, %f ratio\n", - // policy->stat_input_valid_rows, policy->table->members + policy->have_null_key, - // policy->stat_input_valid_rows / (float) (policy->table->members + - // policy->have_null_key)); + // fprintf(stderr, "spill after %ld input rows, %d keys, %f ratio, %ld aggctx bytes, + //%ld aggstate bytes\n", policy->stat_input_valid_rows, policy->table->members + //+ policy->have_null_key, policy->stat_input_valid_rows / (float) + //(policy->table->members + policy->have_null_key), + // MemoryContextMemAllocated(policy->table->ctx, false), + // MemoryContextMemAllocated(policy->agg_extra_mctx, false)); } HashEntry null_key_entry = { .agg_state_index = 1 }; diff --git a/tsl/test/expected/vectorized_aggregation.out b/tsl/test/expected/vectorized_aggregation.out index 12ed14e3e84..c25c51c8a6c 100644 --- a/tsl/test/expected/vectorized_aggregation.out +++ b/tsl/test/expected/vectorized_aggregation.out @@ -388,23 +388,20 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY float_value; Output: _hyper_1_1_chunk.float_value, (PARTIAL sum(_hyper_1_1_chunk.segment_by_value)) Workers Planned: 2 -> Parallel Append - -> Partial HashAggregate - Output: _hyper_1_1_chunk.float_value, PARTIAL sum(_hyper_1_1_chunk.segment_by_value) - Group Key: _hyper_1_1_chunk.float_value + -> Custom Scan (VectorAgg) + Output: _hyper_1_1_chunk.float_value, (PARTIAL sum(_hyper_1_1_chunk.segment_by_value)) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk Output: _hyper_1_1_chunk.float_value, _hyper_1_1_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_11_chunk Output: compress_hyper_2_11_chunk._ts_meta_count, compress_hyper_2_11_chunk.segment_by_value, compress_hyper_2_11_chunk._ts_meta_min_1, compress_hyper_2_11_chunk._ts_meta_max_1, compress_hyper_2_11_chunk."time", compress_hyper_2_11_chunk.int_value, compress_hyper_2_11_chunk.float_value - -> Partial HashAggregate - Output: _hyper_1_2_chunk.float_value, PARTIAL sum(_hyper_1_2_chunk.segment_by_value) - Group Key: _hyper_1_2_chunk.float_value + -> Custom Scan (VectorAgg) + Output: _hyper_1_2_chunk.float_value, (PARTIAL sum(_hyper_1_2_chunk.segment_by_value)) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk Output: _hyper_1_2_chunk.float_value, _hyper_1_2_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_12_chunk Output: compress_hyper_2_12_chunk._ts_meta_count, compress_hyper_2_12_chunk.segment_by_value, compress_hyper_2_12_chunk._ts_meta_min_1, compress_hyper_2_12_chunk._ts_meta_max_1, compress_hyper_2_12_chunk."time", compress_hyper_2_12_chunk.int_value, compress_hyper_2_12_chunk.float_value - -> Partial HashAggregate - Output: _hyper_1_3_chunk.float_value, PARTIAL sum(_hyper_1_3_chunk.segment_by_value) - Group Key: _hyper_1_3_chunk.float_value + -> Custom Scan (VectorAgg) + Output: _hyper_1_3_chunk.float_value, (PARTIAL sum(_hyper_1_3_chunk.segment_by_value)) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_3_chunk Output: _hyper_1_3_chunk.float_value, _hyper_1_3_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_13_chunk @@ -444,7 +441,7 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY float_value; Group Key: _hyper_1_10_chunk.float_value -> Parallel Seq Scan on _timescaledb_internal._hyper_1_10_chunk Output: _hyper_1_10_chunk.float_value, _hyper_1_10_chunk.segment_by_value -(63 rows) +(60 rows) :EXPLAIN SELECT sum(segment_by_value) FROM testtable GROUP BY int_value; @@ -457,23 +454,20 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY int_value; Output: _hyper_1_1_chunk.int_value, (PARTIAL sum(_hyper_1_1_chunk.segment_by_value)) Workers Planned: 2 -> Parallel Append - -> Partial HashAggregate - Output: _hyper_1_1_chunk.int_value, PARTIAL sum(_hyper_1_1_chunk.segment_by_value) - Group Key: _hyper_1_1_chunk.int_value + -> Custom Scan (VectorAgg) + Output: _hyper_1_1_chunk.int_value, (PARTIAL sum(_hyper_1_1_chunk.segment_by_value)) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk Output: _hyper_1_1_chunk.int_value, _hyper_1_1_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_11_chunk Output: compress_hyper_2_11_chunk._ts_meta_count, compress_hyper_2_11_chunk.segment_by_value, compress_hyper_2_11_chunk._ts_meta_min_1, compress_hyper_2_11_chunk._ts_meta_max_1, compress_hyper_2_11_chunk."time", compress_hyper_2_11_chunk.int_value, compress_hyper_2_11_chunk.float_value - -> Partial HashAggregate - Output: _hyper_1_2_chunk.int_value, PARTIAL sum(_hyper_1_2_chunk.segment_by_value) - Group Key: _hyper_1_2_chunk.int_value + -> Custom Scan (VectorAgg) + Output: _hyper_1_2_chunk.int_value, (PARTIAL sum(_hyper_1_2_chunk.segment_by_value)) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk Output: _hyper_1_2_chunk.int_value, _hyper_1_2_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_12_chunk Output: compress_hyper_2_12_chunk._ts_meta_count, compress_hyper_2_12_chunk.segment_by_value, compress_hyper_2_12_chunk._ts_meta_min_1, compress_hyper_2_12_chunk._ts_meta_max_1, compress_hyper_2_12_chunk."time", compress_hyper_2_12_chunk.int_value, compress_hyper_2_12_chunk.float_value - -> Partial HashAggregate - Output: _hyper_1_3_chunk.int_value, PARTIAL sum(_hyper_1_3_chunk.segment_by_value) - Group Key: _hyper_1_3_chunk.int_value + -> Custom Scan (VectorAgg) + Output: _hyper_1_3_chunk.int_value, (PARTIAL sum(_hyper_1_3_chunk.segment_by_value)) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_3_chunk Output: _hyper_1_3_chunk.int_value, _hyper_1_3_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_13_chunk @@ -513,7 +507,7 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY int_value; Group Key: _hyper_1_10_chunk.int_value -> Parallel Seq Scan on _timescaledb_internal._hyper_1_10_chunk Output: _hyper_1_10_chunk.int_value, _hyper_1_10_chunk.segment_by_value -(63 rows) +(60 rows) -- Vectorization possible with grouping by a segmentby column. :EXPLAIN From 339f91a78222ee4c97f29c5f47f248c1f130f8bb Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:59:55 +0200 Subject: [PATCH 08/58] reduce indirections --- tsl/src/nodes/vector_agg/exec.c | 2 +- tsl/src/nodes/vector_agg/exec.h | 2 +- .../nodes/vector_agg/grouping_policy_batch.c | 22 ++++---- .../nodes/vector_agg/grouping_policy_hash.c | 55 ++++++++++--------- 4 files changed, 41 insertions(+), 40 deletions(-) diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index 548b6ca623f..fa4dc703d42 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -90,7 +90,7 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) Aggref *aggref = castNode(Aggref, tlentry->expr); VectorAggFunctions *func = get_vector_aggregate(aggref->aggfnoid); Assert(func != NULL); - def->func = func; + def->func = *func; if (list_length(aggref->args) > 0) { diff --git a/tsl/src/nodes/vector_agg/exec.h b/tsl/src/nodes/vector_agg/exec.h index d886b927d31..1b38fa81fe7 100644 --- a/tsl/src/nodes/vector_agg/exec.h +++ b/tsl/src/nodes/vector_agg/exec.h @@ -15,7 +15,7 @@ typedef struct { - VectorAggFunctions *func; + VectorAggFunctions func; int input_offset; int output_offset; } VectorAggDef; diff --git a/tsl/src/nodes/vector_agg/grouping_policy_batch.c b/tsl/src/nodes/vector_agg/grouping_policy_batch.c index b60d4f230da..23d695cedc1 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_batch.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_batch.c @@ -52,8 +52,8 @@ create_grouping_policy_batch(List *agg_defs, List *output_grouping_columns) ListCell *lc; foreach (lc, agg_defs) { - VectorAggDef *def = lfirst(lc); - policy->agg_states = lappend(policy->agg_states, palloc0(def->func->state_bytes)); + VectorAggDef *agg_def = lfirst(lc); + policy->agg_states = lappend(policy->agg_states, palloc0(agg_def->func.state_bytes)); } policy->output_grouping_values = (Datum *) palloc0(MAXALIGN(list_length(output_grouping_columns) * sizeof(Datum)) + @@ -77,7 +77,7 @@ gp_batch_reset(GroupingPolicy *obj) { VectorAggDef *agg_def = (VectorAggDef *) list_nth(policy->agg_defs, i); void *agg_state = (void *) list_nth(policy->agg_states, i); - agg_def->func->agg_init(agg_state, 1); + agg_def->func.agg_init(agg_state, 1); } const int ngrp = list_length(policy->output_grouping_columns); @@ -126,10 +126,10 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de if (arg_arrow != NULL) { /* Arrow argument. */ - agg_def->func->agg_vector(agg_state, - arg_arrow, - batch_state->vector_qual_result, - agg_extra_mctx); + agg_def->func.agg_vector(agg_state, + arg_arrow, + batch_state->vector_qual_result, + agg_extra_mctx); } else { @@ -146,7 +146,7 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de */ Assert(n > 0); - agg_def->func->agg_const(agg_state, arg_datum, arg_isnull, n, agg_extra_mctx); + agg_def->func.agg_const(agg_state, arg_datum, arg_isnull, n, agg_extra_mctx); } } @@ -211,9 +211,9 @@ gp_batch_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) { VectorAggDef *agg_def = (VectorAggDef *) list_nth(policy->agg_defs, i); void *agg_state = (void *) list_nth(policy->agg_states, i); - agg_def->func->agg_emit(agg_state, - &aggregated_slot->tts_values[agg_def->output_offset], - &aggregated_slot->tts_isnull[agg_def->output_offset]); + agg_def->func.agg_emit(agg_state, + &aggregated_slot->tts_values[agg_def->output_offset], + &aggregated_slot->tts_isnull[agg_def->output_offset]); } const int ngrp = list_length(policy->output_grouping_columns); diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 181261a1eeb..d030696288f 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -105,12 +105,12 @@ create_grouping_policy_hash(List *agg_defs, List *output_grouping_columns) ListCell *lc; foreach (lc, agg_defs) { - VectorAggDef *def = lfirst(lc); - policy->aggstate_bytes_per_key += def->func->state_bytes; + VectorAggDef *agg_def = lfirst(lc); + policy->aggstate_bytes_per_key += agg_def->func.state_bytes; policy->per_agg_states = lappend(policy->per_agg_states, - palloc0(def->func->state_bytes * policy->allocated_aggstate_rows)); + palloc0(agg_def->func.state_bytes * policy->allocated_aggstate_rows)); } policy->table = h_create(CurrentMemoryContext, 1000, NULL); @@ -172,7 +172,7 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de if (arg_arrow != NULL) { /* Arrow argument. */ - agg_def->func->agg_many(agg_states, offsets, arg_arrow, agg_extra_mctx); + agg_def->func.agg_many(agg_states, offsets, arg_arrow, agg_extra_mctx); } else { @@ -180,14 +180,14 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de * Scalar argument, or count(*). The latter has an optimized * implementation for this case. */ - if (agg_def->func->agg_many_scalar != NULL) + if (agg_def->func.agg_many_scalar != NULL) { - agg_def->func->agg_many_scalar(agg_states, - offsets, - batch_state->total_batch_rows, - arg_datum, - arg_isnull, - agg_extra_mctx); + agg_def->func.agg_many_scalar(agg_states, + offsets, + batch_state->total_batch_rows, + arg_datum, + arg_isnull, + agg_extra_mctx); } else { @@ -198,8 +198,8 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de continue; } - void *state = (offsets[i] * agg_def->func->state_bytes + (char *) agg_states); - agg_def->func->agg_const(state, arg_datum, arg_isnull, 1, agg_extra_mctx); + void *state = (offsets[i] * agg_def->func.state_bytes + (char *) agg_states); + agg_def->func.agg_const(state, arg_datum, arg_isnull, 1, agg_extra_mctx); } } } @@ -421,19 +421,19 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) policy->allocated_aggstate_rows = policy->allocated_aggstate_rows * 2 + 1; forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) { - VectorAggDef *def = lfirst(aggdeflc); + VectorAggDef *agg_def = lfirst(aggdeflc); lfirst(aggstatelc) = repalloc(lfirst(aggstatelc), - policy->allocated_aggstate_rows * def->func->state_bytes); + policy->allocated_aggstate_rows * agg_def->func.state_bytes); } } forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) { - const VectorAggDef *def = lfirst(aggdeflc); - def->func->agg_init(def->func->state_bytes * last_initialized_state_index + - (char *) lfirst(aggstatelc), - next_unused_state_index - last_initialized_state_index); + const VectorAggDef *agg_def = lfirst(aggdeflc); + agg_def->func.agg_init(agg_def->func.state_bytes * last_initialized_state_index + + (char *) lfirst(aggstatelc), + next_unused_state_index - last_initialized_state_index); } } @@ -480,11 +480,12 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) /* FIXME doesn't work on final result emission w/o should_emit. */ policy->returning_results = true; h_start_iterate(policy->table, &policy->iter); - // fprintf(stderr, "spill after %ld input rows, %d keys, %f ratio, %ld aggctx bytes, - //%ld aggstate bytes\n", policy->stat_input_valid_rows, policy->table->members + // fprintf(stderr, "spill after %ld input rows, %d keys, %f ratio, %ld aggctx + //bytes, %ld aggstate bytes\n", policy->stat_input_valid_rows, + //policy->table->members //+ policy->have_null_key, policy->stat_input_valid_rows / (float) - //(policy->table->members + policy->have_null_key), - // MemoryContextMemAllocated(policy->table->ctx, false), + //(policy->table->members + + //policy->have_null_key), MemoryContextMemAllocated(policy->table->ctx, false), // MemoryContextMemAllocated(policy->agg_extra_mctx, false)); } @@ -509,10 +510,10 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) { VectorAggDef *agg_def = (VectorAggDef *) list_nth(policy->agg_defs, i); void *agg_states = list_nth(policy->per_agg_states, i); - void *agg_state = entry->agg_state_index * agg_def->func->state_bytes + (char *) agg_states; - agg_def->func->agg_emit(agg_state, - &aggregated_slot->tts_values[agg_def->output_offset], - &aggregated_slot->tts_isnull[agg_def->output_offset]); + void *agg_state = entry->agg_state_index * agg_def->func.state_bytes + (char *) agg_states; + agg_def->func.agg_emit(agg_state, + &aggregated_slot->tts_values[agg_def->output_offset], + &aggregated_slot->tts_isnull[agg_def->output_offset]); } Assert(list_length(policy->output_grouping_columns) == 1); From f075589f68c0b622ca63c0f4f071e3921d82d752 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:33:19 +0200 Subject: [PATCH 09/58] skip null bitmap words --- .../vector_agg/function/agg_many_helper.c | 22 +- tsl/src/nodes/vector_agg/function/functions.c | 29 +-- tsl/src/nodes/vector_agg/function/functions.h | 9 +- .../nodes/vector_agg/grouping_policy_hash.c | 243 +++++++++++------- 4 files changed, 178 insertions(+), 125 deletions(-) diff --git a/tsl/src/nodes/vector_agg/function/agg_many_helper.c b/tsl/src/nodes/vector_agg/function/agg_many_helper.c index ef8acd28181..f8517c4328a 100644 --- a/tsl/src/nodes/vector_agg/function/agg_many_helper.c +++ b/tsl/src/nodes/vector_agg/function/agg_many_helper.c @@ -5,29 +5,23 @@ */ static void -FUNCTION_NAME(many)(void *restrict agg_states, uint32 *restrict offsets, const ArrowArray *vector, - MemoryContext agg_extra_mctx) +FUNCTION_NAME(many)(void *restrict agg_states, uint32 *restrict offsets, int start_row, int end_row, + const ArrowArray *vector, MemoryContext agg_extra_mctx) { MemoryContext old = MemoryContextSwitchTo(agg_extra_mctx); - const int n = vector->length; const CTYPE *values = vector->buffers[1]; const uint64 *valid = vector->buffers[0]; - for (int row = 0; row < n; row++) + for (int row = start_row; row < end_row; row++) { FUNCTION_NAME(state) *state = (offsets[row] + (FUNCTION_NAME(state) *) agg_states); - CTYPE value = values[row]; + const CTYPE value = values[row]; + const bool row_passes = (offsets[row] != 0); + const bool value_notnull = arrow_row_is_valid(valid, row); - if (offsets[row] == 0) + if (row_passes && value_notnull) { - continue; + FUNCTION_NAME(one)(state, value); } - - if (!arrow_row_is_valid(valid, row)) - { - continue; - } - - FUNCTION_NAME(one)(state, value); } MemoryContextSwitchTo(old); } diff --git a/tsl/src/nodes/vector_agg/function/functions.c b/tsl/src/nodes/vector_agg/function/functions.c index 1ebb5b06b22..4dea3c8c90e 100644 --- a/tsl/src/nodes/vector_agg/function/functions.c +++ b/tsl/src/nodes/vector_agg/function/functions.c @@ -53,11 +53,12 @@ count_star_const(void *agg_state, Datum constvalue, bool constisnull, int n, } static void -count_star_many_scalar(void *restrict agg_states, uint32 *restrict offsets, int n, Datum constvalue, - bool constisnull, MemoryContext agg_extra_mctx) +count_star_many_scalar(void *restrict agg_states, uint32 *restrict offsets, int start_row, + int end_row, Datum constvalue, bool constisnull, + MemoryContext agg_extra_mctx) { CountState *states = (CountState *) agg_states; - for (int row = 0; row < n; row++) + for (int row = start_row; row < end_row; row++) { if (offsets[row] == 0) { @@ -131,25 +132,19 @@ count_any_vector(void *agg_state, const ArrowArray *vector, const uint64 *filter } static void -count_any_many(void *restrict agg_states, uint32 *restrict offsets, const ArrowArray *vector, - MemoryContext agg_extra_mctx) +count_any_many(void *restrict agg_states, uint32 *restrict offsets, int start_row, int end_row, + const ArrowArray *vector, MemoryContext agg_extra_mctx) { - const int n = vector->length; const uint64 *valid = vector->buffers[0]; - for (int row = 0; row < n; row++) + for (int row = start_row; row < end_row; row++) { - if (offsets[row] == 0) - { - continue; - } - - if (!arrow_row_is_valid(valid, row)) + CountState *state = (offsets[row] + (CountState *) agg_states); + const bool row_passes = (offsets[row] != 0); + const bool value_notnull = arrow_row_is_valid(valid, row); + if (row_passes && value_notnull) { - continue; + state->count++; } - - CountState *state = (offsets[row] + (CountState *) agg_states); - state->count++; } } diff --git a/tsl/src/nodes/vector_agg/function/functions.h b/tsl/src/nodes/vector_agg/function/functions.h index 4d679e14277..9d5539fb244 100644 --- a/tsl/src/nodes/vector_agg/function/functions.h +++ b/tsl/src/nodes/vector_agg/function/functions.h @@ -27,11 +27,12 @@ typedef struct void (*agg_const)(void *restrict agg_state, Datum constvalue, bool constisnull, int n, MemoryContext agg_extra_mctx); - void (*agg_many)(void *restrict agg_states, uint32 *restrict offsets, const ArrowArray *vector, - MemoryContext agg_extra_mctx); + void (*agg_many)(void *restrict agg_states, uint32 *restrict offsets, int start_row, + int end_row, const ArrowArray *vector, MemoryContext agg_extra_mctx); - void (*agg_many_scalar)(void *restrict agg_states, uint32 *restrict offsets, int n, - Datum constvalue, bool constisnull, MemoryContext agg_extra_mctx); + void (*agg_many_scalar)(void *restrict agg_states, uint32 *restrict offsets, int start_row, + int end_row, Datum constvalue, bool constisnull, + MemoryContext agg_extra_mctx); /* Emit a partial result. */ void (*agg_emit)(void *restrict agg_state, Datum *out_result, bool *out_isnull); diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index d030696288f..eab57f38718 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -87,7 +87,11 @@ typedef struct uint64 allocated_aggstate_rows; List *per_agg_states; + uint64 stat_input_total_rows; + uint64 stat_input_valid_rows; + + uint64 stat_bulk_filtered_rows; } GroupingPolicyHash; static const GroupingPolicy grouping_policy_hash_functions; @@ -134,11 +138,14 @@ gp_hash_reset(GroupingPolicy *obj) policy->have_null_key = false; policy->stat_input_valid_rows = 0; + policy->stat_input_total_rows = 0; + policy->stat_bulk_filtered_rows = 0; } static void -compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_def, void *agg_states, - uint32 *offsets, MemoryContext agg_extra_mctx) +compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int end_row, + VectorAggDef *agg_def, void *agg_states, uint32 *offsets, + MemoryContext agg_extra_mctx) { ArrowArray *arg_arrow = NULL; Datum arg_datum = 0; @@ -172,7 +179,7 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de if (arg_arrow != NULL) { /* Arrow argument. */ - agg_def->func.agg_many(agg_states, offsets, arg_arrow, agg_extra_mctx); + agg_def->func.agg_many(agg_states, offsets, start_row, end_row, arg_arrow, agg_extra_mctx); } else { @@ -184,14 +191,15 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de { agg_def->func.agg_many_scalar(agg_states, offsets, - batch_state->total_batch_rows, + start_row, + end_row, arg_datum, arg_isnull, agg_extra_mctx); } else { - for (int i = 0; i < batch_state->total_batch_rows; i++) + for (int i = start_row; i < end_row; i++) { if (offsets[i] == 0) { @@ -207,14 +215,15 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de static pg_attribute_always_inline uint32 fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, uint32 *restrict offsets, + int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, + uint32 *restrict offsets, void (*get_key)(CompressedColumnValues column, int row, Datum *key, bool *valid)) { CompressedColumnValues column = batch_state->compressed_columns[key_column_index]; // Assert(gv->decompression_type == 8 /* lolwut */); const uint64 *restrict filter = batch_state->vector_qual_result; struct h_hash *restrict table = policy->table; - for (int row = 0; row < batch_state->total_batch_rows; row++) + for (int row = start_row; row < end_row; row++) { bool key_valid = false; Datum key = { 0 }; @@ -242,8 +251,8 @@ fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, } } - policy->stat_input_valid_rows += - arrow_num_valid(column.buffers[0], batch_state->total_batch_rows); + policy->stat_input_total_rows += batch_state->total_batch_rows; + policy->stat_input_valid_rows += arrow_num_valid(filter, batch_state->total_batch_rows); return next_unused_state_index; } @@ -300,51 +309,60 @@ get_key_scalar(CompressedColumnValues column, int row, Datum *restrict key, bool static pg_noinline uint32 fill_offsets_arrow_fixed_8(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, - uint32 *restrict offsets) + int key_column_index, uint32 next_unused_state_index, int start_row, + int end_row, uint32 *restrict offsets) { return fill_offsets_impl(policy, batch_state, key_column_index, next_unused_state_index, + start_row, + end_row, offsets, get_key_arrow_fixed_8); } static pg_noinline uint32 fill_offsets_arrow_fixed_4(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, - uint32 *restrict offsets) + int key_column_index, uint32 next_unused_state_index, int start_row, + int end_row, uint32 *restrict offsets) { return fill_offsets_impl(policy, batch_state, key_column_index, next_unused_state_index, + start_row, + end_row, offsets, get_key_arrow_fixed_4); } static pg_noinline uint32 fill_offsets_arrow_fixed_2(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, - uint32 *restrict offsets) + int key_column_index, uint32 next_unused_state_index, int start_row, + int end_row, uint32 *restrict offsets) { return fill_offsets_impl(policy, batch_state, key_column_index, next_unused_state_index, + start_row, + end_row, offsets, get_key_arrow_fixed_2); } static pg_noinline uint32 fill_offsets_scalar(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, uint32 *restrict offsets) + int key_column_index, uint32 next_unused_state_index, int start_row, + int end_row, uint32 *restrict offsets) { return fill_offsets_impl(policy, batch_state, key_column_index, next_unused_state_index, + start_row, + end_row, offsets, get_key_scalar); } @@ -356,17 +374,6 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) Assert(!policy->returning_results); - /* - * State index zero is invalid, and state index one is for null key. We have - * to initialize it at the first run. - */ - const uint32 last_initialized_state_index = - policy->table->members ? policy->table->members + 2 : 1; - uint32 next_unused_state_index = policy->table->members + 2; - - uint32 offsets[1000] = { 0 }; - Assert(batch_state->total_batch_rows <= 1000); - /* * For the partial aggregation node, the grouping columns are always in the * output, so we don't have to separately look at the list of the grouping @@ -375,76 +382,131 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) Assert(list_length(policy->output_grouping_columns) == 1); GroupingColumn *g = linitial(policy->output_grouping_columns); CompressedColumnValues *key_column = &batch_state->compressed_columns[g->input_offset]; + // const uint64_t* restrict key_validity = key_column->buffers[0]; + const uint64_t *restrict filter = batch_state->vector_qual_result; - switch ((int) key_column->decompression_type) - { - case DT_Scalar: - next_unused_state_index = fill_offsets_scalar(policy, - batch_state, - g->input_offset, - next_unused_state_index, - offsets); - break; - case 8: - next_unused_state_index = fill_offsets_arrow_fixed_8(policy, - batch_state, - g->input_offset, - next_unused_state_index, - offsets); - break; - case 4: - next_unused_state_index = fill_offsets_arrow_fixed_4(policy, - batch_state, - g->input_offset, - next_unused_state_index, - offsets); - break; - case 2: - next_unused_state_index = fill_offsets_arrow_fixed_2(policy, - batch_state, - g->input_offset, - next_unused_state_index, - offsets); - break; - default: - Assert(false); - break; - } + uint32 offsets[1000] = { 0 }; - ListCell *aggdeflc; - ListCell *aggstatelc; + const int n = batch_state->total_batch_rows; + int start_row = 0; + int end_row = 0; - if (next_unused_state_index > last_initialized_state_index) + // for (int end_row = MIN(64, n); end_row <= n; end_row += 64) + for (start_row = 0; start_row < n; start_row = end_row) { - if (next_unused_state_index > policy->allocated_aggstate_rows) + if (filter) + { + if (filter[start_row / 64] == 0) + { + end_row = MIN(start_row + 64, n); + policy->stat_bulk_filtered_rows += 64; + continue; + } + + for (end_row = start_row; end_row < n; end_row = MIN(end_row + 64, n)) + { + if (filter[end_row / 64] == 0) + { + break; + } + } + } + else + { + end_row = n; + } + Assert(start_row <= end_row); + Assert(end_row <= n); + /* + * State index zero is invalid, and state index one is for null key. We have + * to initialize it at the first run. + */ + uint32 last_initialized_state_index = + policy->table->members ? policy->table->members + 2 : 1; + uint32 next_unused_state_index = policy->table->members + 2; + + Assert((size_t) end_row <= sizeof(offsets) / sizeof(*offsets)); + + switch ((int) key_column->decompression_type) { - policy->allocated_aggstate_rows = policy->allocated_aggstate_rows * 2 + 1; + case DT_Scalar: + next_unused_state_index = fill_offsets_scalar(policy, + batch_state, + g->input_offset, + next_unused_state_index, + start_row, + end_row, + offsets); + break; + case 8: + next_unused_state_index = fill_offsets_arrow_fixed_8(policy, + batch_state, + g->input_offset, + next_unused_state_index, + start_row, + end_row, + offsets); + break; + case 4: + next_unused_state_index = fill_offsets_arrow_fixed_4(policy, + batch_state, + g->input_offset, + next_unused_state_index, + start_row, + end_row, + offsets); + break; + case 2: + next_unused_state_index = fill_offsets_arrow_fixed_2(policy, + batch_state, + g->input_offset, + next_unused_state_index, + start_row, + end_row, + offsets); + break; + default: + Assert(false); + break; + } + + ListCell *aggdeflc; + ListCell *aggstatelc; + + if (next_unused_state_index > last_initialized_state_index) + { + if (next_unused_state_index > policy->allocated_aggstate_rows) + { + policy->allocated_aggstate_rows = policy->allocated_aggstate_rows * 2 + 1; + forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) + { + VectorAggDef *agg_def = lfirst(aggdeflc); + lfirst(aggstatelc) = + repalloc(lfirst(aggstatelc), + policy->allocated_aggstate_rows * agg_def->func.state_bytes); + } + } + forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) { - VectorAggDef *agg_def = lfirst(aggdeflc); - lfirst(aggstatelc) = - repalloc(lfirst(aggstatelc), - policy->allocated_aggstate_rows * agg_def->func.state_bytes); + const VectorAggDef *agg_def = lfirst(aggdeflc); + agg_def->func.agg_init(agg_def->func.state_bytes * last_initialized_state_index + + (char *) lfirst(aggstatelc), + next_unused_state_index - last_initialized_state_index); } } forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) { - const VectorAggDef *agg_def = lfirst(aggdeflc); - agg_def->func.agg_init(agg_def->func.state_bytes * last_initialized_state_index + - (char *) lfirst(aggstatelc), - next_unused_state_index - last_initialized_state_index); + compute_single_aggregate(batch_state, + start_row, + end_row, + lfirst(aggdeflc), + lfirst(aggstatelc), + offsets, + policy->agg_extra_mctx); } } - - forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) - { - compute_single_aggregate(batch_state, - lfirst(aggdeflc), - lfirst(aggstatelc), - offsets, - policy->agg_extra_mctx); - } } static bool @@ -480,13 +542,14 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) /* FIXME doesn't work on final result emission w/o should_emit. */ policy->returning_results = true; h_start_iterate(policy->table, &policy->iter); - // fprintf(stderr, "spill after %ld input rows, %d keys, %f ratio, %ld aggctx - //bytes, %ld aggstate bytes\n", policy->stat_input_valid_rows, - //policy->table->members - //+ policy->have_null_key, policy->stat_input_valid_rows / (float) - //(policy->table->members + - //policy->have_null_key), MemoryContextMemAllocated(policy->table->ctx, false), - // MemoryContextMemAllocated(policy->agg_extra_mctx, false)); + // fprintf(stderr, + // "spill after %ld input %ld valid %ld bulk filtered, %d keys, %f ratio, %ld aggctx + //bytes, %ld aggstate bytes\n", policy->stat_input_total_rows, + //policy->stat_input_valid_rows, policy->stat_bulk_filtered_rows, policy->table->members + // + policy->have_null_key, policy->stat_input_valid_rows / + //(float) (policy->table->members + policy->have_null_key), + //MemoryContextMemAllocated(policy->table->ctx, false), + //MemoryContextMemAllocated(policy->agg_extra_mctx, false)); } HashEntry null_key_entry = { .agg_state_index = 1 }; From 88f325dc67840e5f5d07a0b99f701cbcbe2fb4cb Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:44:48 +0200 Subject: [PATCH 10/58] cleanup --- tsl/src/import/ts_simplehash.h | 10 ---------- tsl/src/nodes/vector_agg/grouping_policy_hash.c | 14 ++++++++------ 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/tsl/src/import/ts_simplehash.h b/tsl/src/import/ts_simplehash.h index 2a630f24017..f4f6db2d30d 100644 --- a/tsl/src/import/ts_simplehash.h +++ b/tsl/src/import/ts_simplehash.h @@ -109,8 +109,6 @@ #define SH_RESET SH_MAKE_NAME(reset) #define SH_INSERT SH_MAKE_NAME(insert) #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash) -#define SH_DELETE_ITEM SH_MAKE_NAME(delete_item) -#define SH_DELETE SH_MAKE_NAME(delete) #define SH_LOOKUP SH_MAKE_NAME(lookup) #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash) #define SH_GROW SH_MAKE_NAME(grow) @@ -208,12 +206,6 @@ SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE *tb, SH_KEY_TYPE key); /* *_lookup_hash(_hash *tb, key, uint32 hash) */ SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash); -/* void _delete_item(_hash *tb, *entry) */ -SH_SCOPE void SH_DELETE_ITEM(SH_TYPE *tb, SH_ELEMENT_TYPE *entry); - -/* bool _delete(_hash *tb, key) */ -SH_SCOPE bool SH_DELETE(SH_TYPE *tb, SH_KEY_TYPE key); - /* void _start_iterate(_hash *tb, _iterator *iter) */ SH_SCOPE void SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter); @@ -1023,8 +1015,6 @@ SH_STAT(SH_TYPE *tb) #undef SH_RESET #undef SH_INSERT #undef SH_INSERT_HASH -#undef SH_DELETE_ITEM -#undef SH_DELETE #undef SH_LOOKUP #undef SH_LOOKUP_HASH #undef SH_GROW diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index eab57f38718..e542b12bdeb 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -217,7 +217,8 @@ static pg_attribute_always_inline uint32 fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, uint32 *restrict offsets, - void (*get_key)(CompressedColumnValues column, int row, Datum *key, bool *valid)) + void (*get_key)(CompressedColumnValues column, int row, Datum *restrict key, + bool *restrict valid)) { CompressedColumnValues column = batch_state->compressed_columns[key_column_index]; // Assert(gv->decompression_type == 8 /* lolwut */); @@ -543,13 +544,14 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) policy->returning_results = true; h_start_iterate(policy->table, &policy->iter); // fprintf(stderr, - // "spill after %ld input %ld valid %ld bulk filtered, %d keys, %f ratio, %ld aggctx - //bytes, %ld aggstate bytes\n", policy->stat_input_total_rows, - //policy->stat_input_valid_rows, policy->stat_bulk_filtered_rows, policy->table->members + // "spill after %ld input %ld valid %ld bulk filtered, %d keys, %f ratio, %ld + //aggctx bytes, %ld aggstate bytes\n", policy->stat_input_total_rows, + // policy->stat_input_valid_rows, policy->stat_bulk_filtered_rows, + // policy->table->members // + policy->have_null_key, policy->stat_input_valid_rows / //(float) (policy->table->members + policy->have_null_key), - //MemoryContextMemAllocated(policy->table->ctx, false), - //MemoryContextMemAllocated(policy->agg_extra_mctx, false)); + // MemoryContextMemAllocated(policy->table->ctx, + // false), MemoryContextMemAllocated(policy->agg_extra_mctx, false)); } HashEntry null_key_entry = { .agg_state_index = 1 }; From 15ab44310866ae12d40b739de749b6ca2540a581 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:51:18 +0200 Subject: [PATCH 11/58] crc32 --- .../nodes/vector_agg/grouping_policy_hash.c | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index e542b12bdeb..3dbd81a3bce 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -28,28 +28,25 @@ typedef struct uint32 agg_state_index; } HashEntry; -// static pg_attribute_always_inline uint64 -// hash64_1(uint64 x) -//{ -// x ^= x >> 30; -// x *= 0xbf58476d1ce4e5b9U; -// x ^= x >> 27; -// x *= 0x94d049bb133111ebU; -// x ^= x >> 31; -// return x; -// } - +#ifdef USE_SSE42_CRC32C static pg_attribute_always_inline uint64 -hash64_crc(uint64 x) +hash64(uint64 x) { return _mm_crc32_u64(~0ULL, x); } +#else static pg_attribute_always_inline uint64 hash64(uint64 x) { - return hash64_crc(x); + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9U; + x ^= x >> 27; + x *= 0x94d049bb133111ebU; + x ^= x >> 31; + return x; } +#endif #define SH_PREFIX h #define SH_ELEMENT_TYPE HashEntry @@ -545,7 +542,7 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) h_start_iterate(policy->table, &policy->iter); // fprintf(stderr, // "spill after %ld input %ld valid %ld bulk filtered, %d keys, %f ratio, %ld - //aggctx bytes, %ld aggstate bytes\n", policy->stat_input_total_rows, + // aggctx bytes, %ld aggstate bytes\n", policy->stat_input_total_rows, // policy->stat_input_valid_rows, policy->stat_bulk_filtered_rows, // policy->table->members // + policy->have_null_key, policy->stat_input_valid_rows / From ff16ec8a0a30dc332706c581bf69958b4cf56d3c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:54:06 +0200 Subject: [PATCH 12/58] license --- tsl/src/import/ts_simplehash.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tsl/src/import/ts_simplehash.h b/tsl/src/import/ts_simplehash.h index f4f6db2d30d..48139ec8138 100644 --- a/tsl/src/import/ts_simplehash.h +++ b/tsl/src/import/ts_simplehash.h @@ -1,3 +1,9 @@ +/* + * This file and its contents are licensed under the Apache License 2.0. + * Please see the included NOTICE for copyright information and + * LICENSE-APACHE for a copy of the license. + */ + /* * simplehash.h * From 4291b173df0824df9b8e42ac154beb32d75677c4 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:55:06 +0200 Subject: [PATCH 13/58] benchmark vectorized hash grouping (2024-10-09 no. 10) From 795ef6b189613de38886cb0aa4c4ff332cdf8f1b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:03:45 +0200 Subject: [PATCH 14/58] test deltadelta changes --- .../compression/algorithms/deltadelta_impl.c | 87 ++++++++++++++++--- 1 file changed, 77 insertions(+), 10 deletions(-) diff --git a/tsl/src/compression/algorithms/deltadelta_impl.c b/tsl/src/compression/algorithms/deltadelta_impl.c index 6e036a5722b..c1078456b3b 100644 --- a/tsl/src/compression/algorithms/deltadelta_impl.c +++ b/tsl/src/compression/algorithms/deltadelta_impl.c @@ -12,7 +12,7 @@ #define FUNCTION_NAME_HELPER(X, Y) X##_##Y #define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y) -static ArrowArray * +static pg_noinline ArrowArray * FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, MemoryContext dest_mctx) { StringInfoData si = { .data = DatumGetPointer(compressed), .len = VARSIZE(compressed) }; @@ -44,12 +44,12 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * Pad the number of elements to multiple of 64 bytes if needed, so that we * can work in 64-byte blocks. */ +#define INNER_LOOP_SIZE_LOG2 3 +#define INNER_LOOP_SIZE (1 << INNER_LOOP_SIZE_LOG2) const uint32 n_total = has_nulls ? nulls.num_elements : num_deltas; - const uint32 n_total_padded = - ((n_total * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE); + const uint32 n_total_padded = pad_to_multiple(INNER_LOOP_SIZE, n_total); const uint32 n_notnull = num_deltas; - const uint32 n_notnull_padded = - ((n_notnull * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE); + const uint32 n_notnull_padded = pad_to_multiple(INNER_LOOP_SIZE, n_notnull); Assert(n_total_padded >= n_total); Assert(n_notnull_padded >= n_notnull); Assert(n_total >= n_notnull); @@ -57,7 +57,7 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory /* * We need additional padding at the end of buffer, because the code that - * converts the elements to postres Datum always reads in 8 bytes. + * converts the elements to postgres Datum always reads in 8 bytes. */ const int buffer_bytes = n_total_padded * sizeof(ELEMENT_TYPE) + 8; ELEMENT_TYPE *restrict decompressed_values = MemoryContextAlloc(dest_mctx, buffer_bytes); @@ -75,17 +75,84 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * Also tried zig-zag decoding in a separate loop, seems to be slightly * slower, around the noise threshold. */ -#define INNER_LOOP_SIZE 8 Assert(n_notnull_padded % INNER_LOOP_SIZE == 0); for (uint32 outer = 0; outer < n_notnull_padded; outer += INNER_LOOP_SIZE) { + uint64 x[INNER_LOOP_SIZE]; for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) { - current_delta += zig_zag_decode(deltas_zigzag[outer + inner]); - current_element += current_delta; - decompressed_values[outer + inner] = current_element; + x[inner] = zig_zag_decode(deltas_zigzag[outer + inner]); } + + x[0] += current_delta; + + /* Now deltas of deltas, will make first-order deltas by prefix summation. */ + for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) + { +// for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) +// { +// x[i] = x[i] + x[i - (1 << l)]; +// } + uint64 xx[INNER_LOOP_SIZE]; + for (int i = 0; i < INNER_LOOP_SIZE; i++) + { + xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; + } + for (int i = 0; i < INNER_LOOP_SIZE; i++) + { + x[i] += xx[i]; + } + } + +// const uint64 new_delta = current_delta + x[INNER_LOOP_SIZE - 1]; + const uint64 new_delta = x[INNER_LOOP_SIZE - 1]; + + x[0] += current_element; + + /* Now first-order deltas, will make element values by prefix summation. */ + for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) + { +// for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) +// { +// x[i] = x[i] + x[i - (1 << l)]; +// } + +// for (int i = INNER_LOOP_SIZE - 1; i >= 0; i--) +// { +// x[i] = x[i] + ((i >= (1 << l)) ? x[i - (1 << l)] : 0); +// } + + uint64 xx[INNER_LOOP_SIZE]; + for (int i = 0; i < INNER_LOOP_SIZE; i++) + { + xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; + } + for (int i = 0; i < INNER_LOOP_SIZE; i++) + { + x[i] += xx[i]; + } + } + + /* Now element values. */ +// uint64 xx[INNER_LOOP_SIZE]; +// for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) +// { +// xx[inner] = current_element + (1 + inner) * current_delta; +// } +// for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) +// { +// x[inner] += xx[inner]; +// } + + for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) + { + decompressed_values[outer + inner] = x[inner]; + } + + current_element = x[INNER_LOOP_SIZE - 1]; + current_delta = new_delta; } +#undef INNER_LOOP_SIZE_LOG2 #undef INNER_LOOP_SIZE uint64 *restrict validity_bitmap = NULL; From 1fabb22cd370087bdefa66026b67039e6f7a2186 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:04:16 +0200 Subject: [PATCH 15/58] some speedups and simplehash simplifications --- tsl/src/import/ts_simplehash.h | 161 +++++++++--------- .../nodes/vector_agg/grouping_policy_hash.c | 79 +++++++-- 2 files changed, 142 insertions(+), 98 deletions(-) diff --git a/tsl/src/import/ts_simplehash.h b/tsl/src/import/ts_simplehash.h index 48139ec8138..18c3f73bb6a 100644 --- a/tsl/src/import/ts_simplehash.h +++ b/tsl/src/import/ts_simplehash.h @@ -571,14 +571,6 @@ SH_GROW(SH_TYPE *tb, uint64 newsize) static pg_attribute_always_inline SH_ELEMENT_TYPE * SH_INSERT_HASH_INTERNAL(SH_TYPE *restrict tb, SH_KEY_TYPE key, uint32 hash, bool *found) { - uint32 startelem; - uint32 curelem; - SH_ELEMENT_TYPE *restrict data; - uint32 insertdist; - -restart: - insertdist = 0; - /* * We do the grow check even if the key is actually present, to avoid * doing the check inside the loop. This also lets us avoid having to @@ -600,15 +592,14 @@ SH_INSERT_HASH_INTERNAL(SH_TYPE *restrict tb, SH_KEY_TYPE key, uint32 hash, bool /* SH_STAT(tb); */ } + SH_ELEMENT_TYPE *restrict data = tb->data; + /* perform insert, start bucket search at optimal location */ - data = tb->data; - startelem = SH_INITIAL_BUCKET(tb, hash); - curelem = startelem; + const uint32 startelem = SH_INITIAL_BUCKET(tb, hash); + uint32 curelem = startelem; + uint32 insertdist = 0; while (true) { - uint32 curdist; - uint32 curhash; - uint32 curoptimal; SH_ELEMENT_TYPE *entry = &data[curelem]; /* any empty bucket can directly be used */ @@ -638,76 +629,14 @@ SH_INSERT_HASH_INTERNAL(SH_TYPE *restrict tb, SH_KEY_TYPE key, uint32 hash, bool return entry; } - curhash = SH_ENTRY_HASH(tb, entry); - curoptimal = SH_INITIAL_BUCKET(tb, curhash); - curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem); + const uint32 curhash = SH_ENTRY_HASH(tb, entry); + const uint32 curoptimal = SH_INITIAL_BUCKET(tb, curhash); + const uint32 curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem); if (insertdist > curdist) { - SH_ELEMENT_TYPE *lastentry = entry; - uint32 emptyelem = curelem; - uint32 moveelem; - int32 emptydist = 0; - - /* find next empty bucket */ - while (true) - { - SH_ELEMENT_TYPE *emptyentry; - - emptyelem = SH_NEXT(tb, emptyelem, startelem); - emptyentry = &data[emptyelem]; - - if (SH_ENTRY_EMPTY(emptyentry)) - { - lastentry = emptyentry; - break; - } - - /* - * To avoid negative consequences from overly imbalanced - * hashtables, grow the hashtable if collisions would require - * us to move a lot of entries. The most likely cause of such - * imbalance is filling a (currently) small table, from a - * currently big one, in hash-table order. Don't grow if the - * hashtable would be too empty, to prevent quick space - * explosion for some weird edge cases. - */ - if (unlikely(++emptydist > SH_GROW_MAX_MOVE) && - ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) - { - tb->grow_threshold = 0; - goto restart; - } - } - - /* shift forward, starting at last occupied element */ - - /* - * TODO: This could be optimized to be one memcpy in many cases, - * excepting wrapping around at the end of ->data. Hasn't shown up - * in profiles so far though. - */ - moveelem = emptyelem; - while (moveelem != curelem) - { - SH_ELEMENT_TYPE *moveentry; - - moveelem = SH_PREV(tb, moveelem, startelem); - moveentry = &data[moveelem]; - - memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE)); - lastentry = moveentry; - } - - /* and fill the now empty spot */ - tb->members++; - - entry->SH_KEY = key; -#ifdef SH_STORE_HASH - SH_GET_HASH(tb, entry) = hash; -#endif - *found = false; - return entry; + /* We're going to insert at this position. */ + break; } curelem = SH_NEXT(tb, curelem, startelem); @@ -724,10 +653,76 @@ SH_INSERT_HASH_INTERNAL(SH_TYPE *restrict tb, SH_KEY_TYPE key, uint32 hash, bool if (unlikely(insertdist > SH_GROW_MAX_DIB) && ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) { - tb->grow_threshold = 0; - goto restart; + SH_GROW(tb, tb->size * 2); + return SH_INSERT_HASH_INTERNAL(tb, key, hash, found); } } + + /* Actually insert. */ + SH_ELEMENT_TYPE *entry = &data[curelem]; + SH_ELEMENT_TYPE *lastentry = entry; + uint32 emptyelem = curelem; + int32 emptydist = 0; + + /* find next empty bucket */ + while (true) + { + SH_ELEMENT_TYPE *emptyentry; + + emptyelem = SH_NEXT(tb, emptyelem, startelem); + emptyentry = &data[emptyelem]; + + if (SH_ENTRY_EMPTY(emptyentry)) + { + lastentry = emptyentry; + break; + } + + /* + * To avoid negative consequences from overly imbalanced + * hashtables, grow the hashtable if collisions would require + * us to move a lot of entries. The most likely cause of such + * imbalance is filling a (currently) small table, from a + * currently big one, in hash-table order. Don't grow if the + * hashtable would be too empty, to prevent quick space + * explosion for some weird edge cases. + */ + if (unlikely(++emptydist > SH_GROW_MAX_MOVE) && + ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) + { + SH_GROW(tb, tb->size * 2); + return SH_INSERT_HASH_INTERNAL(tb, key, hash, found); + } + } + + /* shift forward, starting at last occupied element */ + + /* + * TODO: This could be optimized to be one memcpy in many cases, + * excepting wrapping around at the end of ->data. Hasn't shown up + * in profiles so far though. + */ + uint32 moveelem = emptyelem; + while (moveelem != curelem) + { + SH_ELEMENT_TYPE *moveentry; + + moveelem = SH_PREV(tb, moveelem, startelem); + moveentry = &data[moveelem]; + + memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE)); + lastentry = moveentry; + } + + /* and fill the now empty spot */ + tb->members++; + + entry->SH_KEY = key; +#ifdef SH_STORE_HASH + SH_GET_HASH(tb, entry) = hash; +#endif + *found = false; + return entry; } /* diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 3dbd81a3bce..8bce4e1b2a4 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -85,9 +85,7 @@ typedef struct List *per_agg_states; uint64 stat_input_total_rows; - uint64 stat_input_valid_rows; - uint64 stat_bulk_filtered_rows; } GroupingPolicyHash; @@ -114,7 +112,7 @@ create_grouping_policy_hash(List *agg_defs, List *output_grouping_columns) palloc0(agg_def->func.state_bytes * policy->allocated_aggstate_rows)); } - policy->table = h_create(CurrentMemoryContext, 1000, NULL); + policy->table = h_create(CurrentMemoryContext, policy->allocated_aggstate_rows, NULL); policy->have_null_key = false; policy->returning_results = false; @@ -211,15 +209,13 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e } static pg_attribute_always_inline uint32 -fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, - uint32 *restrict offsets, +fill_offsets_impl_for_real(GroupingPolicyHash *policy, + CompressedColumnValues column, const uint64 *restrict filter, + uint32 next_unused_state_index, int start_row, int end_row, + uint32 *restrict offsets, void (*get_key)(CompressedColumnValues column, int row, Datum *restrict key, bool *restrict valid)) { - CompressedColumnValues column = batch_state->compressed_columns[key_column_index]; - // Assert(gv->decompression_type == 8 /* lolwut */); - const uint64 *restrict filter = batch_state->vector_qual_result; struct h_hash *restrict table = policy->table; for (int row = start_row; row < end_row; row++) { @@ -248,6 +244,44 @@ fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, offsets[row] = 1; } } + return next_unused_state_index; +} + +static pg_attribute_always_inline uint32 +fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, + uint32 *restrict offsets, + void (*get_key)(CompressedColumnValues column, int row, Datum *restrict key, + bool *restrict valid)) +{ + CompressedColumnValues column = batch_state->compressed_columns[key_column_index]; + // Assert(gv->decompression_type == 8 /* lolwut */); + const uint64 *restrict filter = batch_state->vector_qual_result; + + if (filter == NULL && column.buffers[0] == NULL) + { + next_unused_state_index = fill_offsets_impl_for_real(policy, column, + filter, next_unused_state_index, start_row, end_row, offsets, get_key); + } + else if (filter != NULL && column.buffers[0] == NULL) + { + next_unused_state_index = fill_offsets_impl_for_real(policy, column, + filter, next_unused_state_index, start_row, end_row, offsets, get_key); + } + else if (filter == NULL && column.buffers[0] != NULL) + { + next_unused_state_index = fill_offsets_impl_for_real(policy, column, + filter, next_unused_state_index, start_row, end_row, offsets, get_key); + } + else if (filter != NULL && column.buffers[0] != NULL) + { + next_unused_state_index = fill_offsets_impl_for_real(policy, column, + filter, next_unused_state_index, start_row, end_row, offsets, get_key); + } + else + { + Assert(false); + } policy->stat_input_total_rows += batch_state->total_batch_rows; policy->stat_input_valid_rows += arrow_num_valid(filter, batch_state->total_batch_rows); @@ -383,8 +417,6 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) // const uint64_t* restrict key_validity = key_column->buffers[0]; const uint64_t *restrict filter = batch_state->vector_qual_result; - uint32 offsets[1000] = { 0 }; - const int n = batch_state->total_batch_rows; int start_row = 0; int end_row = 0; @@ -392,6 +424,10 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) // for (int end_row = MIN(64, n); end_row <= n; end_row += 64) for (start_row = 0; start_row < n; start_row = end_row) { + /* + * If we have a highly selective filter, it's easy to skip the rows for + * which the entire filter bitmap words are zero. + */ if (filter) { if (filter[start_row / 64] == 0) @@ -415,16 +451,22 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) } Assert(start_row <= end_row); Assert(end_row <= n); + /* - * State index zero is invalid, and state index one is for null key. We have - * to initialize it at the first run. + * Remember which aggregation states have already existed, and which we + * have to initialize. State index zero is invalid, and state index one + * is for null key. We have to initialize the null key state at the + * first run. */ - uint32 last_initialized_state_index = + const uint32 last_initialized_state_index = policy->table->members ? policy->table->members + 2 : 1; uint32 next_unused_state_index = policy->table->members + 2; + /* + * Match rows to aggregation states using a hash table. + */ + uint32 offsets[1000] = { 0 }; Assert((size_t) end_row <= sizeof(offsets) / sizeof(*offsets)); - switch ((int) key_column->decompression_type) { case DT_Scalar: @@ -471,6 +513,9 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) ListCell *aggdeflc; ListCell *aggstatelc; + /* + * Initialize the aggregate function states for the newly added keys. + */ if (next_unused_state_index > last_initialized_state_index) { if (next_unused_state_index > policy->allocated_aggstate_rows) @@ -494,6 +539,9 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) } } + /* + * Update the aggregate function states. + */ forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) { compute_single_aggregate(batch_state, @@ -505,6 +553,7 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) policy->agg_extra_mctx); } } + Assert(end_row == n); } static bool From 717abc4ede082bbdc51fe56470ad87a48cfed171 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:04:39 +0200 Subject: [PATCH 16/58] Revert "test deltadelta changes" This reverts commit 795ef6b189613de38886cb0aa4c4ff332cdf8f1b. --- .../compression/algorithms/deltadelta_impl.c | 87 +++---------------- 1 file changed, 10 insertions(+), 77 deletions(-) diff --git a/tsl/src/compression/algorithms/deltadelta_impl.c b/tsl/src/compression/algorithms/deltadelta_impl.c index c1078456b3b..6e036a5722b 100644 --- a/tsl/src/compression/algorithms/deltadelta_impl.c +++ b/tsl/src/compression/algorithms/deltadelta_impl.c @@ -12,7 +12,7 @@ #define FUNCTION_NAME_HELPER(X, Y) X##_##Y #define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y) -static pg_noinline ArrowArray * +static ArrowArray * FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, MemoryContext dest_mctx) { StringInfoData si = { .data = DatumGetPointer(compressed), .len = VARSIZE(compressed) }; @@ -44,12 +44,12 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * Pad the number of elements to multiple of 64 bytes if needed, so that we * can work in 64-byte blocks. */ -#define INNER_LOOP_SIZE_LOG2 3 -#define INNER_LOOP_SIZE (1 << INNER_LOOP_SIZE_LOG2) const uint32 n_total = has_nulls ? nulls.num_elements : num_deltas; - const uint32 n_total_padded = pad_to_multiple(INNER_LOOP_SIZE, n_total); + const uint32 n_total_padded = + ((n_total * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE); const uint32 n_notnull = num_deltas; - const uint32 n_notnull_padded = pad_to_multiple(INNER_LOOP_SIZE, n_notnull); + const uint32 n_notnull_padded = + ((n_notnull * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE); Assert(n_total_padded >= n_total); Assert(n_notnull_padded >= n_notnull); Assert(n_total >= n_notnull); @@ -57,7 +57,7 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory /* * We need additional padding at the end of buffer, because the code that - * converts the elements to postgres Datum always reads in 8 bytes. + * converts the elements to postres Datum always reads in 8 bytes. */ const int buffer_bytes = n_total_padded * sizeof(ELEMENT_TYPE) + 8; ELEMENT_TYPE *restrict decompressed_values = MemoryContextAlloc(dest_mctx, buffer_bytes); @@ -75,84 +75,17 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * Also tried zig-zag decoding in a separate loop, seems to be slightly * slower, around the noise threshold. */ +#define INNER_LOOP_SIZE 8 Assert(n_notnull_padded % INNER_LOOP_SIZE == 0); for (uint32 outer = 0; outer < n_notnull_padded; outer += INNER_LOOP_SIZE) { - uint64 x[INNER_LOOP_SIZE]; for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) { - x[inner] = zig_zag_decode(deltas_zigzag[outer + inner]); + current_delta += zig_zag_decode(deltas_zigzag[outer + inner]); + current_element += current_delta; + decompressed_values[outer + inner] = current_element; } - - x[0] += current_delta; - - /* Now deltas of deltas, will make first-order deltas by prefix summation. */ - for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) - { -// for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) -// { -// x[i] = x[i] + x[i - (1 << l)]; -// } - uint64 xx[INNER_LOOP_SIZE]; - for (int i = 0; i < INNER_LOOP_SIZE; i++) - { - xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; - } - for (int i = 0; i < INNER_LOOP_SIZE; i++) - { - x[i] += xx[i]; - } - } - -// const uint64 new_delta = current_delta + x[INNER_LOOP_SIZE - 1]; - const uint64 new_delta = x[INNER_LOOP_SIZE - 1]; - - x[0] += current_element; - - /* Now first-order deltas, will make element values by prefix summation. */ - for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) - { -// for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) -// { -// x[i] = x[i] + x[i - (1 << l)]; -// } - -// for (int i = INNER_LOOP_SIZE - 1; i >= 0; i--) -// { -// x[i] = x[i] + ((i >= (1 << l)) ? x[i - (1 << l)] : 0); -// } - - uint64 xx[INNER_LOOP_SIZE]; - for (int i = 0; i < INNER_LOOP_SIZE; i++) - { - xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; - } - for (int i = 0; i < INNER_LOOP_SIZE; i++) - { - x[i] += xx[i]; - } - } - - /* Now element values. */ -// uint64 xx[INNER_LOOP_SIZE]; -// for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) -// { -// xx[inner] = current_element + (1 + inner) * current_delta; -// } -// for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) -// { -// x[inner] += xx[inner]; -// } - - for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) - { - decompressed_values[outer + inner] = x[inner]; - } - - current_element = x[INNER_LOOP_SIZE - 1]; - current_delta = new_delta; } -#undef INNER_LOOP_SIZE_LOG2 #undef INNER_LOOP_SIZE uint64 *restrict validity_bitmap = NULL; From b03bd6b8456b0950bafc5e6c582a0792a6cf4ace Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:03:45 +0200 Subject: [PATCH 17/58] test deltadelta changes --- .../compression/algorithms/deltadelta_impl.c | 87 ++++++++++++++++--- 1 file changed, 77 insertions(+), 10 deletions(-) diff --git a/tsl/src/compression/algorithms/deltadelta_impl.c b/tsl/src/compression/algorithms/deltadelta_impl.c index 6e036a5722b..c1078456b3b 100644 --- a/tsl/src/compression/algorithms/deltadelta_impl.c +++ b/tsl/src/compression/algorithms/deltadelta_impl.c @@ -12,7 +12,7 @@ #define FUNCTION_NAME_HELPER(X, Y) X##_##Y #define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y) -static ArrowArray * +static pg_noinline ArrowArray * FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, MemoryContext dest_mctx) { StringInfoData si = { .data = DatumGetPointer(compressed), .len = VARSIZE(compressed) }; @@ -44,12 +44,12 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * Pad the number of elements to multiple of 64 bytes if needed, so that we * can work in 64-byte blocks. */ +#define INNER_LOOP_SIZE_LOG2 3 +#define INNER_LOOP_SIZE (1 << INNER_LOOP_SIZE_LOG2) const uint32 n_total = has_nulls ? nulls.num_elements : num_deltas; - const uint32 n_total_padded = - ((n_total * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE); + const uint32 n_total_padded = pad_to_multiple(INNER_LOOP_SIZE, n_total); const uint32 n_notnull = num_deltas; - const uint32 n_notnull_padded = - ((n_notnull * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE); + const uint32 n_notnull_padded = pad_to_multiple(INNER_LOOP_SIZE, n_notnull); Assert(n_total_padded >= n_total); Assert(n_notnull_padded >= n_notnull); Assert(n_total >= n_notnull); @@ -57,7 +57,7 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory /* * We need additional padding at the end of buffer, because the code that - * converts the elements to postres Datum always reads in 8 bytes. + * converts the elements to postgres Datum always reads in 8 bytes. */ const int buffer_bytes = n_total_padded * sizeof(ELEMENT_TYPE) + 8; ELEMENT_TYPE *restrict decompressed_values = MemoryContextAlloc(dest_mctx, buffer_bytes); @@ -75,17 +75,84 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * Also tried zig-zag decoding in a separate loop, seems to be slightly * slower, around the noise threshold. */ -#define INNER_LOOP_SIZE 8 Assert(n_notnull_padded % INNER_LOOP_SIZE == 0); for (uint32 outer = 0; outer < n_notnull_padded; outer += INNER_LOOP_SIZE) { + uint64 x[INNER_LOOP_SIZE]; for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) { - current_delta += zig_zag_decode(deltas_zigzag[outer + inner]); - current_element += current_delta; - decompressed_values[outer + inner] = current_element; + x[inner] = zig_zag_decode(deltas_zigzag[outer + inner]); } + + x[0] += current_delta; + + /* Now deltas of deltas, will make first-order deltas by prefix summation. */ + for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) + { +// for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) +// { +// x[i] = x[i] + x[i - (1 << l)]; +// } + uint64 xx[INNER_LOOP_SIZE]; + for (int i = 0; i < INNER_LOOP_SIZE; i++) + { + xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; + } + for (int i = 0; i < INNER_LOOP_SIZE; i++) + { + x[i] += xx[i]; + } + } + +// const uint64 new_delta = current_delta + x[INNER_LOOP_SIZE - 1]; + const uint64 new_delta = x[INNER_LOOP_SIZE - 1]; + + x[0] += current_element; + + /* Now first-order deltas, will make element values by prefix summation. */ + for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) + { +// for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) +// { +// x[i] = x[i] + x[i - (1 << l)]; +// } + +// for (int i = INNER_LOOP_SIZE - 1; i >= 0; i--) +// { +// x[i] = x[i] + ((i >= (1 << l)) ? x[i - (1 << l)] : 0); +// } + + uint64 xx[INNER_LOOP_SIZE]; + for (int i = 0; i < INNER_LOOP_SIZE; i++) + { + xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; + } + for (int i = 0; i < INNER_LOOP_SIZE; i++) + { + x[i] += xx[i]; + } + } + + /* Now element values. */ +// uint64 xx[INNER_LOOP_SIZE]; +// for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) +// { +// xx[inner] = current_element + (1 + inner) * current_delta; +// } +// for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) +// { +// x[inner] += xx[inner]; +// } + + for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) + { + decompressed_values[outer + inner] = x[inner]; + } + + current_element = x[INNER_LOOP_SIZE - 1]; + current_delta = new_delta; } +#undef INNER_LOOP_SIZE_LOG2 #undef INNER_LOOP_SIZE uint64 *restrict validity_bitmap = NULL; From 166d0e83e95f475cab1c302fb66e5eda8266b63b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 14 Oct 2024 11:21:08 +0100 Subject: [PATCH 18/58] work with signed types --- tsl/src/compression/algorithms/deltadelta.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tsl/src/compression/algorithms/deltadelta.c b/tsl/src/compression/algorithms/deltadelta.c index 48e3b00b893..e768b5c5f59 100644 --- a/tsl/src/compression/algorithms/deltadelta.c +++ b/tsl/src/compression/algorithms/deltadelta.c @@ -27,7 +27,7 @@ #include "simple8b_rle_bitmap.h" static uint64 zig_zag_encode(uint64 value); -static uint64 zig_zag_decode(uint64 value); +static int64 zig_zag_decode(uint64 value); typedef struct DeltaDeltaCompressed { @@ -583,15 +583,15 @@ delta_delta_decompression_iterator_try_next_forward(DecompressionIterator *iter) #undef ELEMENT_TYPE /* Functions for bulk decompression. */ -#define ELEMENT_TYPE uint16 +#define ELEMENT_TYPE int16 #include "deltadelta_impl.c" #undef ELEMENT_TYPE -#define ELEMENT_TYPE uint32 +#define ELEMENT_TYPE int32 #include "deltadelta_impl.c" #undef ELEMENT_TYPE -#define ELEMENT_TYPE uint64 +#define ELEMENT_TYPE int64 #include "deltadelta_impl.c" #undef ELEMENT_TYPE @@ -603,12 +603,12 @@ delta_delta_decompress_all(Datum compressed_data, Oid element_type, MemoryContex case INT8OID: case TIMESTAMPOID: case TIMESTAMPTZOID: - return delta_delta_decompress_all_uint64(compressed_data, dest_mctx); + return delta_delta_decompress_all_int64(compressed_data, dest_mctx); case INT4OID: case DATEOID: - return delta_delta_decompress_all_uint32(compressed_data, dest_mctx); + return delta_delta_decompress_all_int32(compressed_data, dest_mctx); case INT2OID: - return delta_delta_decompress_all_uint16(compressed_data, dest_mctx); + return delta_delta_decompress_all_int16(compressed_data, dest_mctx); default: elog(ERROR, "type '%s' is not supported for deltadelta decompression", @@ -747,7 +747,7 @@ zig_zag_encode(uint64 value) return (value << 1) ^ (((int64) value) < 0 ? 0xFFFFFFFFFFFFFFFFULL : 0); } -static pg_attribute_always_inline uint64 +static pg_attribute_always_inline int64 zig_zag_decode(uint64 value) { /* ZigZag turns negative numbers into odd ones, and positive numbers into even ones*/ From 7f578b4f65274b8ec1b3127a018c08661c9eba98 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 14 Oct 2024 11:21:15 +0100 Subject: [PATCH 19/58] Revert "work with signed types" This reverts commit 166d0e83e95f475cab1c302fb66e5eda8266b63b. --- tsl/src/compression/algorithms/deltadelta.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tsl/src/compression/algorithms/deltadelta.c b/tsl/src/compression/algorithms/deltadelta.c index e768b5c5f59..48e3b00b893 100644 --- a/tsl/src/compression/algorithms/deltadelta.c +++ b/tsl/src/compression/algorithms/deltadelta.c @@ -27,7 +27,7 @@ #include "simple8b_rle_bitmap.h" static uint64 zig_zag_encode(uint64 value); -static int64 zig_zag_decode(uint64 value); +static uint64 zig_zag_decode(uint64 value); typedef struct DeltaDeltaCompressed { @@ -583,15 +583,15 @@ delta_delta_decompression_iterator_try_next_forward(DecompressionIterator *iter) #undef ELEMENT_TYPE /* Functions for bulk decompression. */ -#define ELEMENT_TYPE int16 +#define ELEMENT_TYPE uint16 #include "deltadelta_impl.c" #undef ELEMENT_TYPE -#define ELEMENT_TYPE int32 +#define ELEMENT_TYPE uint32 #include "deltadelta_impl.c" #undef ELEMENT_TYPE -#define ELEMENT_TYPE int64 +#define ELEMENT_TYPE uint64 #include "deltadelta_impl.c" #undef ELEMENT_TYPE @@ -603,12 +603,12 @@ delta_delta_decompress_all(Datum compressed_data, Oid element_type, MemoryContex case INT8OID: case TIMESTAMPOID: case TIMESTAMPTZOID: - return delta_delta_decompress_all_int64(compressed_data, dest_mctx); + return delta_delta_decompress_all_uint64(compressed_data, dest_mctx); case INT4OID: case DATEOID: - return delta_delta_decompress_all_int32(compressed_data, dest_mctx); + return delta_delta_decompress_all_uint32(compressed_data, dest_mctx); case INT2OID: - return delta_delta_decompress_all_int16(compressed_data, dest_mctx); + return delta_delta_decompress_all_uint16(compressed_data, dest_mctx); default: elog(ERROR, "type '%s' is not supported for deltadelta decompression", @@ -747,7 +747,7 @@ zig_zag_encode(uint64 value) return (value << 1) ^ (((int64) value) < 0 ? 0xFFFFFFFFFFFFFFFFULL : 0); } -static pg_attribute_always_inline int64 +static pg_attribute_always_inline uint64 zig_zag_decode(uint64 value) { /* ZigZag turns negative numbers into odd ones, and positive numbers into even ones*/ From e70cb0ba83ae0a9602578a7eaa801d688801a91c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 14 Oct 2024 11:39:04 +0100 Subject: [PATCH 20/58] bulk stuff specialized to element type --- .../compression/algorithms/deltadelta_impl.c | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/tsl/src/compression/algorithms/deltadelta_impl.c b/tsl/src/compression/algorithms/deltadelta_impl.c index c1078456b3b..ac9d374f4c8 100644 --- a/tsl/src/compression/algorithms/deltadelta_impl.c +++ b/tsl/src/compression/algorithms/deltadelta_impl.c @@ -78,7 +78,7 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory Assert(n_notnull_padded % INNER_LOOP_SIZE == 0); for (uint32 outer = 0; outer < n_notnull_padded; outer += INNER_LOOP_SIZE) { - uint64 x[INNER_LOOP_SIZE]; + ELEMENT_TYPE x[INNER_LOOP_SIZE]; for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) { x[inner] = zig_zag_decode(deltas_zigzag[outer + inner]); @@ -89,48 +89,48 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory /* Now deltas of deltas, will make first-order deltas by prefix summation. */ for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) { -// for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) -// { -// x[i] = x[i] + x[i - (1 << l)]; -// } - uint64 xx[INNER_LOOP_SIZE]; - for (int i = 0; i < INNER_LOOP_SIZE; i++) + for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) { - xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; - } - for (int i = 0; i < INNER_LOOP_SIZE; i++) - { - x[i] += xx[i]; + x[i] = x[i] + x[i - (1 << l)]; } +// ELEMENT_TYPE xx[INNER_LOOP_SIZE]; +// for (int i = 0; i < INNER_LOOP_SIZE; i++) +// { +// xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; +// } +// for (int i = 0; i < INNER_LOOP_SIZE; i++) +// { +// x[i] += xx[i]; +// } } // const uint64 new_delta = current_delta + x[INNER_LOOP_SIZE - 1]; - const uint64 new_delta = x[INNER_LOOP_SIZE - 1]; + const ELEMENT_TYPE new_delta = x[INNER_LOOP_SIZE - 1]; x[0] += current_element; /* Now first-order deltas, will make element values by prefix summation. */ for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) { -// for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) -// { -// x[i] = x[i] + x[i - (1 << l)]; -// } + for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) + { + x[i] = x[i] + x[i - (1 << l)]; + } // for (int i = INNER_LOOP_SIZE - 1; i >= 0; i--) // { // x[i] = x[i] + ((i >= (1 << l)) ? x[i - (1 << l)] : 0); // } - uint64 xx[INNER_LOOP_SIZE]; - for (int i = 0; i < INNER_LOOP_SIZE; i++) - { - xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; - } - for (int i = 0; i < INNER_LOOP_SIZE; i++) - { - x[i] += xx[i]; - } +// ELEMENT_TYPE xx[INNER_LOOP_SIZE]; +// for (int i = 0; i < INNER_LOOP_SIZE; i++) +// { +// xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; +// } +// for (int i = 0; i < INNER_LOOP_SIZE; i++) +// { +// x[i] += xx[i]; +// } } /* Now element values. */ From 00408446976ead1f74c2239863cf676e3d8f1cf2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:05:47 +0100 Subject: [PATCH 21/58] roll back the delta delta stuff --- .../compression/algorithms/deltadelta_impl.c | 75 +------------------ 1 file changed, 4 insertions(+), 71 deletions(-) diff --git a/tsl/src/compression/algorithms/deltadelta_impl.c b/tsl/src/compression/algorithms/deltadelta_impl.c index ac9d374f4c8..3100c8a8822 100644 --- a/tsl/src/compression/algorithms/deltadelta_impl.c +++ b/tsl/src/compression/algorithms/deltadelta_impl.c @@ -12,7 +12,7 @@ #define FUNCTION_NAME_HELPER(X, Y) X##_##Y #define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y) -static pg_noinline ArrowArray * +static ArrowArray * FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, MemoryContext dest_mctx) { StringInfoData si = { .data = DatumGetPointer(compressed), .len = VARSIZE(compressed) }; @@ -78,79 +78,12 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory Assert(n_notnull_padded % INNER_LOOP_SIZE == 0); for (uint32 outer = 0; outer < n_notnull_padded; outer += INNER_LOOP_SIZE) { - ELEMENT_TYPE x[INNER_LOOP_SIZE]; for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) { - x[inner] = zig_zag_decode(deltas_zigzag[outer + inner]); + current_delta += zig_zag_decode(deltas_zigzag[outer + inner]); + current_element += current_delta; + decompressed_values[outer + inner] = current_element; } - - x[0] += current_delta; - - /* Now deltas of deltas, will make first-order deltas by prefix summation. */ - for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) - { - for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) - { - x[i] = x[i] + x[i - (1 << l)]; - } -// ELEMENT_TYPE xx[INNER_LOOP_SIZE]; -// for (int i = 0; i < INNER_LOOP_SIZE; i++) -// { -// xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; -// } -// for (int i = 0; i < INNER_LOOP_SIZE; i++) -// { -// x[i] += xx[i]; -// } - } - -// const uint64 new_delta = current_delta + x[INNER_LOOP_SIZE - 1]; - const ELEMENT_TYPE new_delta = x[INNER_LOOP_SIZE - 1]; - - x[0] += current_element; - - /* Now first-order deltas, will make element values by prefix summation. */ - for (int l = 0; l < INNER_LOOP_SIZE_LOG2; l++) - { - for (int i = INNER_LOOP_SIZE - 1; i >= (1 << l); i--) - { - x[i] = x[i] + x[i - (1 << l)]; - } - -// for (int i = INNER_LOOP_SIZE - 1; i >= 0; i--) -// { -// x[i] = x[i] + ((i >= (1 << l)) ? x[i - (1 << l)] : 0); -// } - -// ELEMENT_TYPE xx[INNER_LOOP_SIZE]; -// for (int i = 0; i < INNER_LOOP_SIZE; i++) -// { -// xx[i] = (i >= (1 << l)) ? x[i - (1 << l)] : 0; -// } -// for (int i = 0; i < INNER_LOOP_SIZE; i++) -// { -// x[i] += xx[i]; -// } - } - - /* Now element values. */ -// uint64 xx[INNER_LOOP_SIZE]; -// for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) -// { -// xx[inner] = current_element + (1 + inner) * current_delta; -// } -// for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) -// { -// x[inner] += xx[inner]; -// } - - for (uint32 inner = 0; inner < INNER_LOOP_SIZE; inner++) - { - decompressed_values[outer + inner] = x[inner]; - } - - current_element = x[INNER_LOOP_SIZE - 1]; - current_delta = new_delta; } #undef INNER_LOOP_SIZE_LOG2 #undef INNER_LOOP_SIZE From 694faf68889ff0dccf2a0dd17d36288746d84e5e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:09:59 +0100 Subject: [PATCH 22/58] use simplehash --- tsl/src/nodes/vector_agg/grouping_policy_hash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 8bce4e1b2a4..9bac208b090 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -25,6 +25,7 @@ typedef struct { Datum key; + uint32 status; uint32 agg_state_index; } HashEntry; @@ -58,7 +59,7 @@ hash64(uint64 x) #define SH_DECLARE #define SH_DEFINE #define SH_ENTRY_EMPTY(entry) (entry->agg_state_index == 0) -#include "import/ts_simplehash.h" +#include struct h_hash; From 3d056744e3e694b3a81f9e37d373a49b904a6373 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:31:06 +0100 Subject: [PATCH 23/58] cleanup --- .../nodes/vector_agg/grouping_policy_hash.c | 221 ++++++++++-------- 1 file changed, 127 insertions(+), 94 deletions(-) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 9bac208b090..d2aa6870335 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -20,16 +20,13 @@ #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/vector_agg/exec.h" -#include - -typedef struct -{ - Datum key; - uint32 status; - uint32 agg_state_index; -} HashEntry; - +/* + * We can use crc32 as a hash function, it has bad properties but takes only one + * cycle, which is why it is sometimes used in the existing hash table + * implementations. + */ #ifdef USE_SSE42_CRC32C +#include static pg_attribute_always_inline uint64 hash64(uint64 x) { @@ -37,6 +34,9 @@ hash64(uint64 x) } #else +/* + * When we don't have the crc32 instruction, use the SplitMix64 finalizer. + */ static pg_attribute_always_inline uint64 hash64(uint64 x) { @@ -49,6 +49,17 @@ hash64(uint64 x) } #endif +/* + * For the hash table, use the generic Datum key that is mapped to the aggregate + * state index. + */ +typedef struct +{ + Datum key; + uint32 status; + uint32 agg_state_index; +} HashEntry; + #define SH_PREFIX h #define SH_ELEMENT_TYPE HashEntry #define SH_KEY_TYPE Datum @@ -209,11 +220,13 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e } } +/* + * Fill the aggregation state offsets for all rows using a hash table. + */ static pg_attribute_always_inline uint32 -fill_offsets_impl_for_real(GroupingPolicyHash *policy, - CompressedColumnValues column, const uint64 *restrict filter, - uint32 next_unused_state_index, int start_row, int end_row, - uint32 *restrict offsets, +fill_offsets_impl(GroupingPolicyHash *policy, CompressedColumnValues column, + const uint64 *restrict filter, uint32 next_unused_state_index, int start_row, + int end_row, uint32 *restrict offsets, void (*get_key)(CompressedColumnValues column, int row, Datum *restrict key, bool *restrict valid)) { @@ -248,36 +261,64 @@ fill_offsets_impl_for_real(GroupingPolicyHash *policy, return next_unused_state_index; } +/* + * This function exists just to nudge the compiler to generate simplified + * implementation for the important case where the entire batch matches and the + * key has no null values. + */ static pg_attribute_always_inline uint32 -fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, - uint32 *restrict offsets, - void (*get_key)(CompressedColumnValues column, int row, Datum *restrict key, - bool *restrict valid)) +fill_offsets_dispatch(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int key_column_index, uint32 next_unused_state_index, int start_row, + int end_row, uint32 *restrict offsets, + void (*get_key)(CompressedColumnValues column, int row, Datum *restrict key, + bool *restrict valid)) { CompressedColumnValues column = batch_state->compressed_columns[key_column_index]; - // Assert(gv->decompression_type == 8 /* lolwut */); const uint64 *restrict filter = batch_state->vector_qual_result; if (filter == NULL && column.buffers[0] == NULL) { - next_unused_state_index = fill_offsets_impl_for_real(policy, column, - filter, next_unused_state_index, start_row, end_row, offsets, get_key); + next_unused_state_index = fill_offsets_impl(policy, + column, + filter, + next_unused_state_index, + start_row, + end_row, + offsets, + get_key); } else if (filter != NULL && column.buffers[0] == NULL) { - next_unused_state_index = fill_offsets_impl_for_real(policy, column, - filter, next_unused_state_index, start_row, end_row, offsets, get_key); + next_unused_state_index = fill_offsets_impl(policy, + column, + filter, + next_unused_state_index, + start_row, + end_row, + offsets, + get_key); } else if (filter == NULL && column.buffers[0] != NULL) { - next_unused_state_index = fill_offsets_impl_for_real(policy, column, - filter, next_unused_state_index, start_row, end_row, offsets, get_key); + next_unused_state_index = fill_offsets_impl(policy, + column, + filter, + next_unused_state_index, + start_row, + end_row, + offsets, + get_key); } else if (filter != NULL && column.buffers[0] != NULL) { - next_unused_state_index = fill_offsets_impl_for_real(policy, column, - filter, next_unused_state_index, start_row, end_row, offsets, get_key); + next_unused_state_index = fill_offsets_impl(policy, + column, + filter, + next_unused_state_index, + start_row, + end_row, + offsets, + get_key); } else { @@ -289,15 +330,17 @@ fill_offsets_impl(GroupingPolicyHash *policy, DecompressBatchState *batch_state, return next_unused_state_index; } -// static pg_attribute_always_inline -// void get_key_generic(CompressedColumnValues *column, int row, Datum *key, bool *valid) -//{ -// Assert(column->decompression_type > 0); -// const void *values = column->arrow->buffers[1]; -// const uint64 *key_validity = column->arrow->buffers[0]; -// *valid = arrow_row_is_valid(key_validity, row); -// memcpy(key, column->decompression_type * row + (char *) values, column->decompression_type); -// } +/* + * Functions to get the key value from the decompressed column, depending on its + * width and whether it's a scalar column. + */ +static pg_attribute_always_inline void +get_key_scalar(CompressedColumnValues column, int row, Datum *restrict key, bool *restrict valid) +{ + Assert(column.decompression_type == DT_Scalar); + *key = *column.output_value; + *valid = !*column.output_isnull; +} static pg_attribute_always_inline void get_key_arrow_fixed(CompressedColumnValues column, int row, int key_bytes, Datum *restrict key, @@ -328,31 +371,33 @@ static pg_attribute_always_inline void get_key_arrow_fixed_8(CompressedColumnValues column, int row, Datum *restrict key, bool *restrict valid) { - /* FIXME for float8 not by value */ - get_key_arrow_fixed(column, row, 8, key, valid); -} +#ifndef USE_FLOAT8_BYVAL + /* + * Shouldn't be called for this configuration, because we only use this + * grouping strategy for by-value types. + */ + Assert(false); +#endif -static pg_attribute_always_inline void -get_key_scalar(CompressedColumnValues column, int row, Datum *restrict key, bool *restrict valid) -{ - Assert(column.decompression_type == DT_Scalar); - *key = *column.output_value; - *valid = !*column.output_isnull; + get_key_arrow_fixed(column, row, 8, key, valid); } +/* + * Implementation of bulk hashing specialized for a given key width. + */ static pg_noinline uint32 fill_offsets_arrow_fixed_8(GroupingPolicyHash *policy, DecompressBatchState *batch_state, int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, uint32 *restrict offsets) { - return fill_offsets_impl(policy, - batch_state, - key_column_index, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key_arrow_fixed_8); + return fill_offsets_dispatch(policy, + batch_state, + key_column_index, + next_unused_state_index, + start_row, + end_row, + offsets, + get_key_arrow_fixed_8); } static pg_noinline uint32 @@ -360,14 +405,14 @@ fill_offsets_arrow_fixed_4(GroupingPolicyHash *policy, DecompressBatchState *bat int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, uint32 *restrict offsets) { - return fill_offsets_impl(policy, - batch_state, - key_column_index, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key_arrow_fixed_4); + return fill_offsets_dispatch(policy, + batch_state, + key_column_index, + next_unused_state_index, + start_row, + end_row, + offsets, + get_key_arrow_fixed_4); } static pg_noinline uint32 @@ -375,14 +420,14 @@ fill_offsets_arrow_fixed_2(GroupingPolicyHash *policy, DecompressBatchState *bat int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, uint32 *restrict offsets) { - return fill_offsets_impl(policy, - batch_state, - key_column_index, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key_arrow_fixed_2); + return fill_offsets_dispatch(policy, + batch_state, + key_column_index, + next_unused_state_index, + start_row, + end_row, + offsets, + get_key_arrow_fixed_2); } static pg_noinline uint32 @@ -390,14 +435,14 @@ fill_offsets_scalar(GroupingPolicyHash *policy, DecompressBatchState *batch_stat int key_column_index, uint32 next_unused_state_index, int start_row, int end_row, uint32 *restrict offsets) { - return fill_offsets_impl(policy, - batch_state, - key_column_index, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key_scalar); + return fill_offsets_dispatch(policy, + batch_state, + key_column_index, + next_unused_state_index, + start_row, + end_row, + offsets, + get_key_scalar); } static void @@ -407,6 +452,9 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) Assert(!policy->returning_results); + const uint64_t *restrict filter = batch_state->vector_qual_result; + const int n = batch_state->total_batch_rows; + /* * For the partial aggregation node, the grouping columns are always in the * output, so we don't have to separately look at the list of the grouping @@ -415,14 +463,8 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) Assert(list_length(policy->output_grouping_columns) == 1); GroupingColumn *g = linitial(policy->output_grouping_columns); CompressedColumnValues *key_column = &batch_state->compressed_columns[g->input_offset]; - // const uint64_t* restrict key_validity = key_column->buffers[0]; - const uint64_t *restrict filter = batch_state->vector_qual_result; - - const int n = batch_state->total_batch_rows; int start_row = 0; int end_row = 0; - - // for (int end_row = MIN(64, n); end_row <= n; end_row += 64) for (start_row = 0; start_row < n; start_row = end_row) { /* @@ -561,11 +603,7 @@ static bool gp_hash_should_emit(GroupingPolicy *gp) { GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; - (void) policy; - // if (policy->table->members + policy->have_null_key > 0) - // { - // return true; - // } + /* * Don't grow the hash table cardinality too much, otherwise we become bound * by memory reads. In general, when this first stage of grouping doesn't @@ -573,11 +611,7 @@ gp_hash_should_emit(GroupingPolicy *gp) * work will be done by the final Postgres aggregation, so we should bail * out early here. */ - if (policy->table->members * sizeof(HashEntry) > 128 * 1024) - { - return true; - } - return false; + return policy->table->members * sizeof(HashEntry) > 128 * 1024; } static bool @@ -587,7 +621,6 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) if (!policy->returning_results) { - /* FIXME doesn't work on final result emission w/o should_emit. */ policy->returning_results = true; h_start_iterate(policy->table, &policy->iter); // fprintf(stderr, From d90a90f24c9f5bfa3089d9233f836d541ce5ecb5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:31:18 +0100 Subject: [PATCH 24/58] benchmark vectorized hash grouping (simple) (2024-10-14 no. 11) From 4a9354964fcecd533e770f7e2f1d64b9d1c7e660 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:26:30 +0100 Subject: [PATCH 25/58] add more tests --- tsl/test/expected/vector_agg_functions.out | 2145 ++++++++++++-------- tsl/test/sql/vector_agg_functions.sql | 49 +- 2 files changed, 1293 insertions(+), 901 deletions(-) diff --git a/tsl/test/expected/vector_agg_functions.out b/tsl/test/expected/vector_agg_functions.out index 9e9f8115e57..c79c667d15a 100644 --- a/tsl/test/expected/vector_agg_functions.out +++ b/tsl/test/expected/vector_agg_functions.out @@ -9,7 +9,7 @@ $$ LANGUAGE SQL; \set CHUNKS 2::int \set CHUNK_ROWS 100000::int \set GROUPING_CARDINALITY 10::int -create table aggfns(t int, s int, ss int, +create table aggfns(t int, s int, cint2 int2, cint4 int4, cint8 int8, cfloat4 float4, cfloat8 float8, cts timestamp, ctstz timestamptz, @@ -22,27 +22,47 @@ NOTICE: adding not-null constraint to column "s" (1,public,aggfns,t) (1 row) -insert into aggfns -select s * 10000::int + t, - s, +create view source as +select s * 10000::int + t as t, s, - case when t % 1051 = 0 then null else (mix(s + t + 1) * 32767)::int2 end, - (mix(s + t + 2) * 32767 * 65536)::int4, - (mix(s + t + 3) * 32767 * 65536)::int8, + case when t % 1051 = 0 then null else (mix(s + t + 1) * 32767)::int2 end as cint2, + (mix(s + t + 2) * 32767 * 65536)::int4 as cint4, + (mix(s + t + 3) * 32767 * 65536)::int8 as cint8, case when s = 1 and t = 1061 then 'nan'::float4 when s = 2 and t = 1061 then '+inf'::float4 when s = 3 and t = 1061 then '-inf'::float4 - else (mix(s + t + 4) * 100)::float4 end, - (mix(s + t + 5) * 100)::float8, - '2021-01-01 01:01:01'::timestamp + interval '1 second' * (s * 10000::int + t), - '2021-01-01 01:01:01'::timestamptz + interval '1 second' * (s * 10000::int + t), - '2021-01-01 01:01:01'::timestamptz + interval '1 day' * (s * 10000::int + t) + else (mix(s + t + 4) * 100)::float4 end as cfloat4, + (mix(s + t + 5) * 100)::float8 as cfloat8, + '2021-01-01 01:01:01'::timestamp + interval '1 second' * (s * 10000::int + t) as cts, + '2021-01-01 01:01:01'::timestamptz + interval '1 second' * (s * 10000::int + t) as ctstz, + '2021-01-01'::date + interval '1 day' * (s * 10000::int + t) as cdate from generate_series(1::int, :CHUNK_ROWS * :CHUNKS / :GROUPING_CARDINALITY) t, generate_series(0::int, :GROUPING_CARDINALITY - 1::int) s(s) ; +insert into aggfns select * from source where s = 1; alter table aggfns set (timescaledb.compress, timescaledb.compress_orderby = 't', timescaledb.compress_segmentby = 's'); +select count(compress_chunk(x)) from show_chunks('aggfns') x; + count +------- + 1 +(1 row) + +alter table aggfns add column ss int default 11; +insert into aggfns +select *, + case + -- null in entire batch + when s = 2 then null + -- null for some rows + when s = 3 and t % 1053 = 0 then null + -- for some rows same as default + when s = 4 and t % 1057 = 0 then 11 + -- not null for entire batch + else s + end as ss +from source where s != 1; select count(compress_chunk(x)) from show_chunks('aggfns') x; count ------- @@ -75,7 +95,8 @@ from 'cfloat8', 'cts', 'ctstz', - 'cdate']) variable, + 'cdate', + '*']) variable, unnest(array[ 'min', 'max', @@ -102,13 +123,50 @@ where end and case + when variable = '*' then function = 'count' when condition = 'cint2 is null' then variable = 'cint2' - when function = 'count' then variable in ('cfloat4', 's') + when function = 'count' then variable in ('cfloat4', 's', 'ss') when variable = 't' then function in ('min', 'max') when variable in ('cts', 'ctstz', 'cdate') then function in ('min', 'max') else true end order by explain, condition.n, variable, function, grouping.n \gexec +select count(*) from aggfns order by 1; + count +-------- + 200000 +(1 row) + +select s, count(*) from aggfns group by s order by 1; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 20000 + 4 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select ss, count(*) from aggfns group by ss order by 1; + ss | count +----+------- + 0 | 20000 + 3 | 19981 + 4 | 19981 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 + 11 | 40019 + | 19 +(10 rows) + select max(cdate) from aggfns order by 1; max ------------ @@ -134,8 +192,6 @@ select ss, max(cdate) from aggfns group by ss order by 1; ss | max ----+------------ 0 | 10-05-2075 - 1 | 02-21-2103 - 2 | 07-09-2130 3 | 11-24-2157 4 | 04-11-2185 5 | 08-28-2212 @@ -143,6 +199,8 @@ select ss, max(cdate) from aggfns group by ss order by 1; 7 | 06-01-2267 8 | 10-17-2294 9 | 03-05-2322 + 11 | 01-24-2183 + | 07-03-2156 (10 rows) select min(cdate) from aggfns order by 1; @@ -170,8 +228,6 @@ select ss, min(cdate) from aggfns group by ss order by 1; ss | min ----+------------ 0 | 01-02-2021 - 1 | 05-20-2048 - 2 | 10-06-2075 3 | 02-22-2103 4 | 07-10-2130 5 | 11-25-2157 @@ -179,6 +235,8 @@ select ss, min(cdate) from aggfns group by ss order by 1; 7 | 08-29-2212 8 | 01-15-2240 9 | 06-02-2267 + 11 | 05-20-2048 + | 08-11-2104 (10 rows) select avg(cfloat4) from aggfns order by 1; @@ -206,15 +264,15 @@ select ss, avg(cfloat4) from aggfns group by ss order by 1; ss | avg ----+-------------------- 0 | -0.132126759885764 - 1 | NaN - 2 | Infinity 3 | -Infinity - 4 | -0.13252146150968 + 4 | -0.124977103379316 5 | -0.130611110996222 6 | -0.131984978889441 7 | -0.131050092529273 8 | -0.131313872741675 9 | -0.132765194868064 + 11 | NaN + | -5.91588952823689 (10 rows) select count(cfloat4) from aggfns order by 1; @@ -242,15 +300,15 @@ select ss, count(cfloat4) from aggfns group by ss order by 1; ss | count ----+------- 0 | 20000 - 1 | 20000 - 2 | 20000 - 3 | 20000 - 4 | 20000 + 3 | 19981 + 4 | 19981 5 | 20000 6 | 20000 7 | 20000 8 | 20000 9 | 20000 + 11 | 40019 + | 19 (10 rows) select max(cfloat4) from aggfns order by 1; @@ -275,18 +333,18 @@ select s, max(cfloat4) from aggfns group by s order by 1; (10 rows) select ss, max(cfloat4) from aggfns group by ss order by 1; - ss | max -----+---------- - 0 | 49.9977 - 1 | NaN - 2 | Infinity - 3 | 49.9977 - 4 | 49.9977 - 5 | 49.9977 - 6 | 49.9977 - 7 | 49.9977 - 8 | 49.9977 - 9 | 49.9977 + ss | max +----+--------- + 0 | 49.9977 + 3 | 49.9977 + 4 | 49.9977 + 5 | 49.9977 + 6 | 49.9977 + 7 | 49.9977 + 8 | 49.9977 + 9 | 49.9977 + 11 | NaN + | 48.478 (10 rows) select min(cfloat4) from aggfns order by 1; @@ -314,8 +372,6 @@ select ss, min(cfloat4) from aggfns group by ss order by 1; ss | min ----+----------- 0 | -49.9756 - 1 | NaN - 2 | -49.9756 3 | -Infinity 4 | -49.9756 5 | -49.9756 @@ -323,6 +379,8 @@ select ss, min(cfloat4) from aggfns group by ss order by 1; 7 | -49.9756 8 | -49.9756 9 | -49.9756 + 11 | NaN + | -46.7478 (10 rows) select stddev(cfloat4) from aggfns order by 1; @@ -350,15 +408,15 @@ select ss, stddev(cfloat4) from aggfns group by ss order by 1; ss | stddev ----+------------------ 0 | 28.8941380063427 - 1 | NaN - 2 | NaN 3 | NaN - 4 | 28.8948189281654 + 4 | 28.89302954083 5 | 28.8951827753267 6 | 28.8960531969495 7 | 28.8959678301628 8 | 28.8963276918371 9 | 28.8968307405966 + 11 | NaN + | 27.2867943876683 (10 rows) select sum(cfloat4) from aggfns order by 1; @@ -386,15 +444,15 @@ select ss, sum(cfloat4) from aggfns group by ss order by 1; ss | sum ----+----------- 0 | -2642.54 - 1 | NaN - 2 | Infinity 3 | -Infinity - 4 | -2650.43 + 4 | -2497.17 5 | -2612.22 6 | -2639.7 7 | -2621 8 | -2626.28 9 | -2655.3 + 11 | NaN + | -112.402 (10 rows) select avg(cfloat8) from aggfns order by 1; @@ -422,15 +480,15 @@ select ss, avg(cfloat8) from aggfns group by ss order by 1; ss | avg ----+-------------------- 0 | -0.131261021163082 - 1 | -0.129096584053477 - 2 | -0.132733892038232 - 3 | -0.132521462687291 - 4 | -0.130611112199258 + 3 | -0.128550004763072 + 4 | -0.129264304572168 5 | -0.131984980024863 6 | -0.131050093692029 7 | -0.13131387403002 8 | -0.132765196124092 9 | -0.134423591727391 + 11 | -0.131587538323368 + | -4.30903203025656 (10 rows) select max(cfloat8) from aggfns order by 1; @@ -455,18 +513,18 @@ select s, max(cfloat8) from aggfns group by s order by 1; (10 rows) select ss, max(cfloat8) from aggfns group by ss order by 1; - ss | max -----+----------------- - 0 | 49.997744965367 - 1 | 49.997744965367 - 2 | 49.997744965367 - 3 | 49.997744965367 - 4 | 49.997744965367 - 5 | 49.997744965367 - 6 | 49.997744965367 - 7 | 49.997744965367 - 8 | 49.997744965367 - 9 | 49.997744965367 + ss | max +----+------------------ + 0 | 49.997744965367 + 3 | 49.997744965367 + 4 | 49.997744965367 + 5 | 49.997744965367 + 6 | 49.997744965367 + 7 | 49.997744965367 + 8 | 49.997744965367 + 9 | 49.997744965367 + 11 | 49.997744965367 + | 49.2015035590157 (10 rows) select min(cfloat8) from aggfns order by 1; @@ -494,8 +552,6 @@ select ss, min(cfloat8) from aggfns group by ss order by 1; ss | min ----+------------------- 0 | -49.9755693599582 - 1 | -49.9755693599582 - 2 | -49.9755693599582 3 | -49.9755693599582 4 | -49.9755693599582 5 | -49.9755693599582 @@ -503,6 +559,8 @@ select ss, min(cfloat8) from aggfns group by ss order by 1; 7 | -49.9755693599582 8 | -49.9755693599582 9 | -49.9755693599582 + 11 | -49.9755693599582 + | -38.3512130472809 (10 rows) select stddev(cfloat8) from aggfns order by 1; @@ -530,15 +588,15 @@ select ss, stddev(cfloat8) from aggfns group by ss order by 1; ss | stddev ----+------------------ 0 | 28.893219634188 - 1 | 28.8952055755515 - 2 | 28.8950722121689 - 3 | 28.8948189369737 - 4 | 28.8951827840888 + 3 | 28.8988694216911 + 4 | 28.8937408619903 5 | 28.8960532056265 6 | 28.8959678388464 7 | 28.8963277006942 8 | 28.8968307494196 9 | 28.8953209642426 + 11 | 28.8954978116663 + | 24.556507133269 (10 rows) select sum(cfloat8) from aggfns order by 1; @@ -566,15 +624,15 @@ select ss, sum(cfloat8) from aggfns group by ss order by 1; ss | sum ----+------------------- 0 | -2625.22042326164 - 1 | -2581.93168106955 - 2 | -2654.67784076463 - 3 | -2650.42925374582 - 4 | -2612.22224398516 + 3 | -2568.55764517095 + 4 | -2582.83006965648 5 | -2639.69960049726 6 | -2621.00187384058 7 | -2626.2774806004 8 | -2655.30392248183 9 | -2688.47183454782 + 11 | -5266.00169616286 + | -81.8716085748747 (10 rows) select avg(cint2) from aggfns order by 1; @@ -599,18 +657,18 @@ select s, avg(cint2) from aggfns group by s order by 1; (10 rows) select ss, avg(cint2) from aggfns group by ss order by 1; - ss | avg -----+---------------------- - 0 | -42.2972824182973825 - 1 | -43.0287773384715480 - 2 | -40.9893899204244032 - 3 | -42.8851408838396477 - 4 | -42.0152144537310445 - 5 | -43.5287022671537961 - 6 | -41.7711325759471498 - 7 | -41.3288123717531655 - 8 | -40.6353035383614434 - 9 | -43.1119563585406136 + ss | avg +----+----------------------- + 0 | -42.2972824182973825 + 3 | -42.6071034966436229 + 4 | -40.0837090471896604 + 5 | -43.5287022671537961 + 6 | -41.7711325759471498 + 7 | -41.3288123717531655 + 8 | -40.6353035383614434 + 9 | -43.1119563585406136 + 11 | -42.9734623946374528 + | -335.0000000000000000 (10 rows) select max(cint2) from aggfns order by 1; @@ -638,8 +696,6 @@ select ss, max(cint2) from aggfns group by ss order by 1; ss | max ----+------- 0 | 16383 - 1 | 16383 - 2 | 16383 3 | 16383 4 | 16383 5 | 16383 @@ -647,6 +703,8 @@ select ss, max(cint2) from aggfns group by ss order by 1; 7 | 16383 8 | 16383 9 | 16383 + 11 | 16383 + | 13720 (10 rows) select min(cint2) from aggfns order by 1; @@ -674,8 +732,6 @@ select ss, min(cint2) from aggfns group by ss order by 1; ss | min ----+-------- 0 | -16375 - 1 | -16375 - 2 | -16375 3 | -16375 4 | -16375 5 | -16375 @@ -683,6 +739,8 @@ select ss, min(cint2) from aggfns group by ss order by 1; 7 | -16375 8 | -16375 9 | -16375 + 11 | -16375 + | -15206 (10 rows) select stddev(cint2) from aggfns order by 1; @@ -710,15 +768,15 @@ select ss, stddev(cint2) from aggfns group by ss order by 1; ss | stddev ----+------------------- 0 | 9468.854793575036 - 1 | 9468.590431229826 - 2 | 9469.116705177088 - 3 | 9466.421782354268 - 4 | 9467.442985677590 + 3 | 9466.351508971864 + 4 | 9469.021659499615 5 | 9467.599133444078 6 | 9468.362090451302 7 | 9467.745653535755 8 | 9466.743345080951 9 | 9468.145452253715 + 11 | 9467.946206136906 + | 9796.780548277639 (10 rows) select sum(cint2) from aggfns order by 1; @@ -743,18 +801,18 @@ select s, sum(cint2) from aggfns group by s order by 1; (10 rows) select ss, sum(cint2) from aggfns group by ss order by 1; - ss | sum -----+--------- - 0 | -845142 - 1 | -859758 - 2 | -819009 - 3 | -856888 - 4 | -839506 - 5 | -869747 - 6 | -834629 - 7 | -825791 - 8 | -811934 - 9 | -861420 + ss | sum +----+---------- + 0 | -845142 + 3 | -850523 + 4 | -800151 + 5 | -869747 + 6 | -834629 + 7 | -825791 + 8 | -811934 + 9 | -861420 + 11 | -1718122 + | -6365 (10 rows) select avg(cint4) from aggfns order by 1; @@ -782,15 +840,15 @@ select ss, avg(cint4) from aggfns group by ss order by 1; ss | avg ----+----------------------- 0 | -2919248.121000000000 - 1 | -2836378.364750000000 - 2 | -2837313.994650000000 - 3 | -2818722.941500000000 - 4 | -2772243.427000000000 + 3 | -2682566.613082428307 + 4 | -2753845.761523447275 5 | -2850351.637450000000 6 | -2845789.891100000000 7 | -2804766.678700000000 8 | -2834269.365200000000 9 | -2814193.446750000000 + 11 | -2846001.238586671331 + | -146005017.57894737 (10 rows) select max(cint4) from aggfns order by 1; @@ -818,8 +876,6 @@ select ss, max(cint4) from aggfns group by ss order by 1; ss | max ----+------------ 0 | 1073660631 - 1 | 1073660631 - 2 | 1073660631 3 | 1073660631 4 | 1073660631 5 | 1073660631 @@ -827,6 +883,8 @@ select ss, max(cint4) from aggfns group by ss order by 1; 7 | 1073660631 8 | 1073660631 9 | 1073660631 + 11 | 1073660631 + | 978482473 (10 rows) select min(cint4) from aggfns order by 1; @@ -854,8 +912,6 @@ select ss, min(cint4) from aggfns group by ss order by 1; ss | min ----+------------- 0 | -1073184428 - 1 | -1073184428 - 2 | -1073184428 3 | -1073184428 4 | -1073184428 5 | -1073184428 @@ -863,6 +919,8 @@ select ss, min(cint4) from aggfns group by ss order by 1; 7 | -1073184428 8 | -1073184428 9 | -1073184428 + 11 | -1073184428 + | -960881516 (10 rows) select stddev(cint4) from aggfns order by 1; @@ -890,15 +948,15 @@ select ss, stddev(cint4) from aggfns group by ss order by 1; ss | stddev ----+----------- 0 | 620497458 - 1 | 620477996 - 2 | 620477953 - 3 | 620458232 - 4 | 620500878 + 3 | 620502162 + 4 | 620460588 5 | 620498014 6 | 620492575 7 | 620500389 8 | 620519080 9 | 620517247 + 11 | 620490348 + | 569369090 (10 rows) select sum(cint4) from aggfns order by 1; @@ -923,18 +981,18 @@ select s, sum(cint4) from aggfns group by s order by 1; (10 rows) select ss, sum(cint4) from aggfns group by ss order by 1; - ss | sum -----+-------------- - 0 | -58384962420 - 1 | -56727567295 - 2 | -56746279893 - 3 | -56374458830 - 4 | -55444868540 - 5 | -57007032749 - 6 | -56915797822 - 7 | -56095333574 - 8 | -56685387304 - 9 | -56283868935 + ss | sum +----+--------------- + 0 | -58384962420 + 3 | -53600363496 + 4 | -55024592161 + 5 | -57007032749 + 6 | -56915797822 + 7 | -56095333574 + 8 | -56685387304 + 9 | -56283868935 + 11 | -113894123567 + | -2774095334 (10 rows) select avg(cint8) from aggfns order by 1; @@ -962,15 +1020,15 @@ select ss, avg(cint8) from aggfns group by ss order by 1; ss | avg ----+----------------------- 0 | -2836378.364750000000 - 1 | -2837313.994650000000 - 2 | -2818722.941500000000 - 3 | -2772243.427000000000 - 4 | -2850351.637450000000 + 3 | -2653999.743206045743 + 4 | -3009394.583854661929 5 | -2845789.891100000000 6 | -2804766.678700000000 7 | -2834269.365200000000 8 | -2814193.446750000000 9 | -2819857.913500000000 + 11 | -2748620.862390364577 + | -127121035.31578947 (10 rows) select max(cint8) from aggfns order by 1; @@ -998,8 +1056,6 @@ select ss, max(cint8) from aggfns group by ss order by 1; ss | max ----+------------ 0 | 1073660631 - 1 | 1073660631 - 2 | 1073660631 3 | 1073660631 4 | 1073660631 5 | 1073660631 @@ -1007,6 +1063,8 @@ select ss, max(cint8) from aggfns group by ss order by 1; 7 | 1073660631 8 | 1073660631 9 | 1073660631 + 11 | 1073660631 + | 989667573 (10 rows) select min(cint8) from aggfns order by 1; @@ -1034,8 +1092,6 @@ select ss, min(cint8) from aggfns group by ss order by 1; ss | min ----+------------- 0 | -1073184428 - 1 | -1073184428 - 2 | -1073184428 3 | -1073184428 4 | -1073184428 5 | -1073184428 @@ -1043,6 +1099,8 @@ select ss, min(cint8) from aggfns group by ss order by 1; 7 | -1073184428 8 | -1073184428 9 | -1073184428 + 11 | -1073184428 + | -796158533 (10 rows) select stddev(cint8) from aggfns order by 1; @@ -1070,15 +1128,15 @@ select ss, stddev(cint8) from aggfns group by ss order by 1; ss | stddev ----+----------- 0 | 620477996 - 1 | 620477953 - 2 | 620458232 - 3 | 620500878 - 4 | 620498014 + 3 | 620514804 + 4 | 620462299 5 | 620492575 6 | 620500389 7 | 620519080 8 | 620517247 9 | 620524975 + 11 | 620478168 + | 609007799 (10 rows) select sum(cint8) from aggfns order by 1; @@ -1103,18 +1161,18 @@ select s, sum(cint8) from aggfns group by s order by 1; (10 rows) select ss, sum(cint8) from aggfns group by ss order by 1; - ss | sum -----+-------------- - 0 | -56727567295 - 1 | -56746279893 - 2 | -56374458830 - 3 | -55444868540 - 4 | -57007032749 - 5 | -56915797822 - 6 | -56095333574 - 7 | -56685387304 - 8 | -56283868935 - 9 | -56397158270 + ss | sum +----+--------------- + 0 | -56727567295 + 3 | -53029568869 + 4 | -60130713180 + 5 | -56915797822 + 6 | -56095333574 + 7 | -56685387304 + 8 | -56283868935 + 9 | -56397158270 + 11 | -109997058292 + | -2415299671 (10 rows) select max(cts) from aggfns order by 1; @@ -1142,8 +1200,6 @@ select ss, max(cts) from aggfns group by ss order by 1; ss | max ----+-------------------------- 0 | Fri Jan 01 06:34:21 2021 - 1 | Fri Jan 01 09:21:01 2021 - 2 | Fri Jan 01 12:07:41 2021 3 | Fri Jan 01 14:54:21 2021 4 | Fri Jan 01 17:41:01 2021 5 | Fri Jan 01 20:27:41 2021 @@ -1151,6 +1207,8 @@ select ss, max(cts) from aggfns group by ss order by 1; 7 | Sat Jan 02 02:01:01 2021 8 | Sat Jan 02 04:47:41 2021 9 | Sat Jan 02 07:34:21 2021 + 11 | Fri Jan 01 17:27:33 2021 + | Fri Jan 01 14:45:52 2021 (10 rows) select min(cts) from aggfns order by 1; @@ -1178,8 +1236,6 @@ select ss, min(cts) from aggfns group by ss order by 1; ss | min ----+-------------------------- 0 | Fri Jan 01 01:01:02 2021 - 1 | Fri Jan 01 03:47:42 2021 - 2 | Fri Jan 01 06:34:22 2021 3 | Fri Jan 01 09:21:02 2021 4 | Fri Jan 01 12:07:42 2021 5 | Fri Jan 01 14:54:22 2021 @@ -1187,6 +1243,8 @@ select ss, min(cts) from aggfns group by ss order by 1; 7 | Fri Jan 01 20:27:42 2021 8 | Fri Jan 01 23:14:22 2021 9 | Sat Jan 02 02:01:02 2021 + 11 | Fri Jan 01 03:47:42 2021 + | Fri Jan 01 09:29:58 2021 (10 rows) select max(ctstz) from aggfns order by 1; @@ -1214,8 +1272,6 @@ select ss, max(ctstz) from aggfns group by ss order by 1; ss | max ----+------------------------------ 0 | Fri Jan 01 06:34:21 2021 PST - 1 | Fri Jan 01 09:21:01 2021 PST - 2 | Fri Jan 01 12:07:41 2021 PST 3 | Fri Jan 01 14:54:21 2021 PST 4 | Fri Jan 01 17:41:01 2021 PST 5 | Fri Jan 01 20:27:41 2021 PST @@ -1223,6 +1279,8 @@ select ss, max(ctstz) from aggfns group by ss order by 1; 7 | Sat Jan 02 02:01:01 2021 PST 8 | Sat Jan 02 04:47:41 2021 PST 9 | Sat Jan 02 07:34:21 2021 PST + 11 | Fri Jan 01 17:27:33 2021 PST + | Fri Jan 01 14:45:52 2021 PST (10 rows) select min(ctstz) from aggfns order by 1; @@ -1250,8 +1308,6 @@ select ss, min(ctstz) from aggfns group by ss order by 1; ss | min ----+------------------------------ 0 | Fri Jan 01 01:01:02 2021 PST - 1 | Fri Jan 01 03:47:42 2021 PST - 2 | Fri Jan 01 06:34:22 2021 PST 3 | Fri Jan 01 09:21:02 2021 PST 4 | Fri Jan 01 12:07:42 2021 PST 5 | Fri Jan 01 14:54:22 2021 PST @@ -1259,6 +1315,8 @@ select ss, min(ctstz) from aggfns group by ss order by 1; 7 | Fri Jan 01 20:27:42 2021 PST 8 | Fri Jan 01 23:14:22 2021 PST 9 | Sat Jan 02 02:01:02 2021 PST + 11 | Fri Jan 01 03:47:42 2021 PST + | Fri Jan 01 09:29:58 2021 PST (10 rows) select avg(s) from aggfns order by 1; @@ -1286,8 +1344,6 @@ select ss, avg(s) from aggfns group by ss order by 1; ss | avg ----+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 3 | 3.0000000000000000 4 | 4.0000000000000000 5 | 5.0000000000000000 @@ -1295,6 +1351,8 @@ select ss, avg(s) from aggfns group by ss order by 1; 7 | 7.0000000000000000 8 | 8.0000000000000000 9 | 9.0000000000000000 + 11 | 1.5011869362053025 + | 3.0000000000000000 (10 rows) select count(s) from aggfns order by 1; @@ -1322,15 +1380,15 @@ select ss, count(s) from aggfns group by ss order by 1; ss | count ----+------- 0 | 20000 - 1 | 20000 - 2 | 20000 - 3 | 20000 - 4 | 20000 + 3 | 19981 + 4 | 19981 5 | 20000 6 | 20000 7 | 20000 8 | 20000 9 | 20000 + 11 | 40019 + | 19 (10 rows) select max(s) from aggfns order by 1; @@ -1358,8 +1416,6 @@ select ss, max(s) from aggfns group by ss order by 1; ss | max ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -1367,6 +1423,8 @@ select ss, max(s) from aggfns group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 4 + | 3 (10 rows) select min(s) from aggfns order by 1; @@ -1394,8 +1452,6 @@ select ss, min(s) from aggfns group by ss order by 1; ss | min ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -1403,6 +1459,8 @@ select ss, min(s) from aggfns group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 1 + | 3 (10 rows) select stddev(s) from aggfns order by 1; @@ -1427,18 +1485,18 @@ select s, stddev(s) from aggfns group by s order by 1; (10 rows) select ss, stddev(s) from aggfns group by ss order by 1; - ss | stddev -----+-------- - 0 | 0 - 1 | 0 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 - 6 | 0 - 7 | 0 - 8 | 0 - 9 | 0 + ss | stddev +----+------------------------ + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0.50284545977155885187 + | 0 (10 rows) select sum(s) from aggfns order by 1; @@ -1466,31 +1524,31 @@ select ss, sum(s) from aggfns group by ss order by 1; ss | sum ----+-------- 0 | 0 - 1 | 20000 - 2 | 40000 - 3 | 60000 - 4 | 80000 + 3 | 59943 + 4 | 79924 5 | 100000 6 | 120000 7 | 140000 8 | 160000 9 | 180000 + 11 | 60076 + | 57 (10 rows) select avg(ss) from aggfns order by 1; avg -------------------- - 4.5000000000000000 + 6.4009880938689175 (1 row) select s, avg(ss) from aggfns group by s order by 1; s | avg ---+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 + 1 | 11.0000000000000000 + 2 | 11.0000000000000000 3 | 3.0000000000000000 - 4 | 4.0000000000000000 + 4 | 4.0066500000000000 5 | 5.0000000000000000 6 | 6.0000000000000000 7 | 7.0000000000000000 @@ -1502,8 +1560,6 @@ select ss, avg(ss) from aggfns group by ss order by 1; ss | avg ----+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 3 | 3.0000000000000000 4 | 4.0000000000000000 5 | 5.0000000000000000 @@ -1511,22 +1567,60 @@ select ss, avg(ss) from aggfns group by ss order by 1; 7 | 7.0000000000000000 8 | 8.0000000000000000 9 | 9.0000000000000000 + 11 | 11.0000000000000000 + | +(10 rows) + +select count(ss) from aggfns order by 1; + count +-------- + 199981 +(1 row) + +select s, count(ss) from aggfns group by s order by 1; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 19981 + 4 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select ss, count(ss) from aggfns group by ss order by 1; + ss | count +----+------- + 0 | 20000 + 3 | 19981 + 4 | 19981 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 + 11 | 40019 + | 0 (10 rows) select max(ss) from aggfns order by 1; max ----- - 9 + 11 (1 row) select s, max(ss) from aggfns group by s order by 1; s | max ---+----- 0 | 0 - 1 | 1 - 2 | 2 + 1 | 11 + 2 | 11 3 | 3 - 4 | 4 + 4 | 11 5 | 5 6 | 6 7 | 7 @@ -1538,8 +1632,6 @@ select ss, max(ss) from aggfns group by ss order by 1; ss | max ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -1547,6 +1639,8 @@ select ss, max(ss) from aggfns group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 11 + | (10 rows) select min(ss) from aggfns order by 1; @@ -1559,8 +1653,8 @@ select s, min(ss) from aggfns group by s order by 1; s | min ---+----- 0 | 0 - 1 | 1 - 2 | 2 + 1 | 11 + 2 | 11 3 | 3 4 | 4 5 | 5 @@ -1574,8 +1668,6 @@ select ss, min(ss) from aggfns group by ss order by 1; ss | min ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -1583,35 +1675,35 @@ select ss, min(ss) from aggfns group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 11 + | (10 rows) select stddev(ss) from aggfns order by 1; stddev -------------------- - 2.8722885039992502 + 3.3528328280068652 (1 row) select s, stddev(ss) from aggfns group by s order by 1; - s | stddev ----+-------- - 0 | 0 - 1 | 0 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 - 6 | 0 - 7 | 0 - 8 | 0 - 9 | 0 + s | stddev +---+------------------------ + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0.21565737387148452722 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 (10 rows) select ss, stddev(ss) from aggfns group by ss order by 1; ss | stddev ----+-------- 0 | 0 - 1 | 0 - 2 | 0 3 | 0 4 | 0 5 | 0 @@ -1619,22 +1711,24 @@ select ss, stddev(ss) from aggfns group by ss order by 1; 7 | 0 8 | 0 9 | 0 + 11 | 0 + | (10 rows) select sum(ss) from aggfns order by 1; - sum --------- - 900000 + sum +--------- + 1280076 (1 row) select s, sum(ss) from aggfns group by s order by 1; s | sum ---+-------- 0 | 0 - 1 | 20000 - 2 | 40000 - 3 | 60000 - 4 | 80000 + 1 | 220000 + 2 | 220000 + 3 | 59943 + 4 | 80133 5 | 100000 6 | 120000 7 | 140000 @@ -1646,15 +1740,15 @@ select ss, sum(ss) from aggfns group by ss order by 1; ss | sum ----+-------- 0 | 0 - 1 | 20000 - 2 | 40000 - 3 | 60000 - 4 | 80000 + 3 | 59943 + 4 | 79924 5 | 100000 6 | 120000 7 | 140000 8 | 160000 9 | 180000 + 11 | 440209 + | (10 rows) select max(t) from aggfns order by 1; @@ -1682,8 +1776,6 @@ select ss, max(t) from aggfns group by ss order by 1; ss | max ----+-------- 0 | 20000 - 1 | 30000 - 2 | 40000 3 | 50000 4 | 60000 5 | 70000 @@ -1691,6 +1783,8 @@ select ss, max(t) from aggfns group by ss order by 1; 7 | 90000 8 | 100000 9 | 110000 + 11 | 59192 + | 49491 (10 rows) select min(t) from aggfns order by 1; @@ -1718,8 +1812,6 @@ select ss, min(t) from aggfns group by ss order by 1; ss | min ----+------- 0 | 1 - 1 | 10001 - 2 | 20001 3 | 30001 4 | 40001 5 | 50001 @@ -1727,6 +1819,44 @@ select ss, min(t) from aggfns group by ss order by 1; 7 | 70001 8 | 80001 9 | 90001 + 11 | 10001 + | 30537 +(10 rows) + +select count(*) from aggfns where cfloat8 > 0 order by 1; + count +------- + 99430 +(1 row) + +select s, count(*) from aggfns where cfloat8 > 0 group by s order by 1; + s | count +---+------- + 0 | 9943 + 1 | 9943 + 2 | 9942 + 3 | 9942 + 4 | 9943 + 5 | 9943 + 6 | 9944 + 7 | 9944 + 8 | 9943 + 9 | 9943 +(10 rows) + +select ss, count(*) from aggfns where cfloat8 > 0 group by ss order by 1; + ss | count +----+------- + 0 | 9943 + 3 | 9934 + 4 | 9932 + 5 | 9943 + 6 | 9944 + 7 | 9944 + 8 | 9943 + 9 | 9943 + 11 | 19896 + | 8 (10 rows) select max(cdate) from aggfns where cfloat8 > 0 order by 1; @@ -1754,8 +1884,6 @@ select ss, max(cdate) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+------------ 0 | 10-04-2075 - 1 | 02-21-2103 - 2 | 07-08-2130 3 | 11-22-2157 4 | 04-11-2185 5 | 08-27-2212 @@ -1763,6 +1891,8 @@ select ss, max(cdate) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 05-31-2267 8 | 10-15-2294 9 | 03-05-2322 + 11 | 03-03-2180 + | 07-03-2156 (10 rows) select min(cdate) from aggfns where cfloat8 > 0 order by 1; @@ -1790,8 +1920,6 @@ select ss, min(cdate) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+------------ 0 | 01-02-2021 - 1 | 05-20-2048 - 2 | 10-11-2075 3 | 02-26-2103 4 | 07-13-2130 5 | 11-27-2157 @@ -1799,6 +1927,8 @@ select ss, min(cdate) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 08-29-2212 8 | 01-15-2240 9 | 06-02-2267 + 11 | 05-20-2048 + | 05-18-2110 (10 rows) select avg(cfloat4) from aggfns where cfloat8 > 0 order by 1; @@ -1826,15 +1956,15 @@ select ss, avg(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | avg ----+-------------------- 0 | -0.542617154225893 - 1 | -0.540875748760701 - 2 | -0.541406464808325 3 | -Infinity - 4 | -0.544616367218129 + 4 | -0.545104747625541 5 | -0.544616367218129 6 | -0.547797322998719 7 | -0.547797322998719 8 | -0.544139963208192 9 | -0.547059247380753 + 11 | -0.540899217374597 + | -1.46971142292023 (10 rows) select count(cfloat4) from aggfns where cfloat8 > 0 order by 1; @@ -1862,15 +1992,15 @@ select ss, count(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | count ----+------- 0 | 9943 - 1 | 9943 - 2 | 9942 - 3 | 9942 - 4 | 9943 + 3 | 9934 + 4 | 9932 5 | 9943 6 | 9944 7 | 9944 8 | 9943 9 | 9943 + 11 | 19896 + | 8 (10 rows) select max(cfloat4) from aggfns where cfloat8 > 0 order by 1; @@ -1898,8 +2028,6 @@ select ss, max(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+--------- 0 | 49.9734 - 1 | 49.9734 - 2 | 49.9734 3 | 49.9734 4 | 49.9734 5 | 49.9734 @@ -1907,6 +2035,8 @@ select ss, max(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 49.9734 8 | 49.9734 9 | 49.9734 + 11 | 49.9734 + | 24.3202 (10 rows) select min(cfloat4) from aggfns where cfloat8 > 0 order by 1; @@ -1934,8 +2064,6 @@ select ss, min(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+----------- 0 | -49.9722 - 1 | -49.9722 - 2 | -49.9722 3 | -Infinity 4 | -49.9722 5 | -49.9722 @@ -1943,6 +2071,8 @@ select ss, min(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | -49.9722 8 | -49.9722 9 | -49.9722 + 11 | -49.9722 + | -45.111 (10 rows) select stddev(cfloat4) from aggfns where cfloat8 > 0 order by 1; @@ -1970,15 +2100,15 @@ select ss, stddev(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | stddev ----+------------------ 0 | 28.889048755135 - 1 | 28.8872257367626 - 2 | 28.8886301576555 3 | NaN - 4 | 28.8889504423898 + 4 | 28.8896204728232 5 | 28.8889504423898 6 | 28.8892391773513 7 | 28.8892391773513 8 | 28.8883896891408 9 | 28.8893600799806 + 11 | 28.886867950284 + | 23.0946953685783 (10 rows) select sum(cfloat4) from aggfns where cfloat8 > 0 order by 1; @@ -2006,15 +2136,15 @@ select ss, sum(cfloat4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | sum ----+----------- 0 | -5395.24 - 1 | -5377.93 - 2 | -5382.66 3 | -Infinity - 4 | -5415.12 + 4 | -5413.98 5 | -5415.12 6 | -5447.3 7 | -5447.3 8 | -5410.38 9 | -5439.41 + 11 | -10761.7 + | -11.7577 (10 rows) select avg(cfloat8) from aggfns where cfloat8 > 0 order by 1; @@ -2042,15 +2172,15 @@ select ss, avg(cfloat8) from aggfns where cfloat8 > 0 group by ss order by 1; ss | avg ----+------------------ 0 | 25.0253254492148 - 1 | 25.0296791394684 - 2 | 25.0284447917954 - 3 | 25.0284447917954 - 4 | 25.0283891332554 + 3 | 25.0326248798677 + 4 | 25.0314280577154 5 | 25.0283891332554 6 | 25.0265337956144 7 | 25.0265337956144 8 | 25.0286117211772 9 | 25.0252759158804 + 11 | 25.0275446062773 + | 19.8378204280743 (10 rows) select max(cfloat8) from aggfns where cfloat8 > 0 order by 1; @@ -2075,18 +2205,18 @@ select s, max(cfloat8) from aggfns where cfloat8 > 0 group by s order by 1; (10 rows) select ss, max(cfloat8) from aggfns where cfloat8 > 0 group by ss order by 1; - ss | max -----+----------------- - 0 | 49.997744965367 - 1 | 49.997744965367 - 2 | 49.997744965367 - 3 | 49.997744965367 - 4 | 49.997744965367 - 5 | 49.997744965367 - 6 | 49.997744965367 - 7 | 49.997744965367 - 8 | 49.997744965367 - 9 | 49.997744965367 + ss | max +----+------------------ + 0 | 49.997744965367 + 3 | 49.997744965367 + 4 | 49.997744965367 + 5 | 49.997744965367 + 6 | 49.997744965367 + 7 | 49.997744965367 + 8 | 49.997744965367 + 9 | 49.997744965367 + 11 | 49.997744965367 + | 49.2015035590157 (10 rows) select min(cfloat8) from aggfns where cfloat8 > 0 order by 1; @@ -2114,8 +2244,6 @@ select ss, min(cfloat8) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+--------------------- 0 | 0.00456937123090029 - 1 | 0.00456937123090029 - 2 | 0.00456937123090029 3 | 0.00456937123090029 4 | 0.00456937123090029 5 | 0.00456937123090029 @@ -2123,6 +2251,8 @@ select ss, min(cfloat8) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 0.00456937123090029 8 | 0.00456937123090029 9 | 0.00456937123090029 + 11 | 0.00456937123090029 + | 0.0712367007508874 (10 rows) select stddev(cfloat8) from aggfns where cfloat8 > 0 order by 1; @@ -2150,15 +2280,15 @@ select ss, stddev(cfloat8) from aggfns where cfloat8 > 0 group by ss order by 1; ss | stddev ----+------------------ 0 | 14.4393152148108 - 1 | 14.4397230124184 - 2 | 14.4399246592273 - 3 | 14.4399246592273 - 4 | 14.4391994993402 + 3 | 14.4385675470181 + 4 | 14.4418957208478 5 | 14.4391994993402 6 | 14.4396587086659 7 | 14.4396587086659 8 | 14.4388979969066 9 | 14.4386334818319 + 11 | 14.4381143863288 + | 16.2202029568666 (10 rows) select sum(cfloat8) from aggfns where cfloat8 > 0 order by 1; @@ -2186,15 +2316,15 @@ select ss, sum(cfloat8) from aggfns where cfloat8 > 0 group by ss order by 1; ss | sum ----+------------------ 0 | 248826.810941542 - 1 | 248870.099683735 - 2 | 248832.79812003 - 3 | 248832.79812003 - 4 | 248857.273151958 + 3 | 248674.095556606 + 4 | 248612.143469229 5 | 248857.273151958 6 | 248863.85206359 7 | 248863.85206359 8 | 248859.486343665 9 | 248826.318431599 + 11 | 497948.027486494 + | 158.702563424595 (10 rows) select avg(cint2) from aggfns where cfloat8 > 0 order by 1; @@ -2219,18 +2349,18 @@ select s, avg(cint2) from aggfns where cfloat8 > 0 group by s order by 1; (10 rows) select ss, avg(cint2) from aggfns where cfloat8 > 0 group by ss order by 1; - ss | avg -----+--------------------- - 0 | 62.5336219045701631 - 1 | 61.1402396053558844 - 2 | 64.7349239907379442 - 3 | 60.4202577527184857 - 4 | 58.4389090177133655 - 5 | 53.6134098459679855 - 6 | 59.4139909411172622 - 7 | 62.9917446894191080 - 8 | 57.2486665995773372 - 9 | 59.3958123615864707 + ss | avg +----+----------------------- + 0 | 62.5336219045701631 + 3 | 59.2794236195082628 + 4 | 60.5569773299748111 + 5 | 53.6134098459679855 + 6 | 59.4139909411172622 + 7 | 62.9917446894191080 + 8 | 57.2486665995773372 + 9 | 59.3958123615864707 + 11 | 61.8774966041153092 + | 1475.6250000000000000 (10 rows) select max(cint2) from aggfns where cfloat8 > 0 order by 1; @@ -2258,8 +2388,6 @@ select ss, max(cint2) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+------- 0 | 16377 - 1 | 16377 - 2 | 16377 3 | 16377 4 | 16377 5 | 16377 @@ -2267,6 +2395,8 @@ select ss, max(cint2) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 16377 8 | 16377 9 | 16377 + 11 | 16377 + | 13720 (10 rows) select min(cint2) from aggfns where cfloat8 > 0 order by 1; @@ -2294,8 +2424,6 @@ select ss, min(cint2) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+-------- 0 | -16374 - 1 | -16374 - 2 | -16374 3 | -16374 4 | -16374 5 | -16374 @@ -2303,6 +2431,8 @@ select ss, min(cint2) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | -16374 8 | -16374 9 | -16374 + 11 | -16374 + | -12426 (10 rows) select stddev(cint2) from aggfns where cfloat8 > 0 order by 1; @@ -2330,15 +2460,15 @@ select ss, stddev(cint2) from aggfns where cfloat8 > 0 group by ss order by 1; ss | stddev ----+------------------- 0 | 9396.868628954375 - 1 | 9397.478004684803 - 2 | 9396.003979674052 - 3 | 9395.588131506675 - 4 | 9395.667525723886 + 3 | 9395.145600532270 + 4 | 9398.818234815872 5 | 9394.622668940645 6 | 9397.336273737402 7 | 9394.449892433625 8 | 9394.534603851403 9 | 9396.374239570388 + 11 | 9394.931149048751 + | 10506.43193392 (10 rows) select sum(cint2) from aggfns where cfloat8 > 0 order by 1; @@ -2363,18 +2493,18 @@ select s, sum(cint2) from aggfns where cfloat8 > 0 group by s order by 1; (10 rows) select ss, sum(cint2) from aggfns where cfloat8 > 0 group by ss order by 1; - ss | sum -----+-------- - 0 | 621209 - 1 | 607306 - 2 | 643012 - 3 | 600094 - 4 | 580649 - 5 | 532542 - 6 | 590278 - 7 | 625697 - 8 | 568880 - 9 | 590038 + ss | sum +----+--------- + 0 | 621209 + 3 | 588289 + 4 | 601028 + 5 | 532542 + 6 | 590278 + 7 | 625697 + 8 | 568880 + 9 | 590038 + 11 | 1229939 + | 11805 (10 rows) select avg(cint4) from aggfns where cfloat8 > 0 order by 1; @@ -2399,18 +2529,18 @@ select s, avg(cint4) from aggfns where cfloat8 > 0 group by s order by 1; (10 rows) select ss, avg(cint4) from aggfns where cfloat8 > 0 group by ss order by 1; - ss | avg -----+----------------------- - 0 | -3230391.667404203963 - 1 | -3063702.023936437695 - 2 | -3067528.135686984510 - 3 | -3067528.135686984510 - 4 | -2963500.289651010761 - 5 | -2963500.289651010761 - 6 | -3033294.541331456154 - 7 | -3033294.541331456154 - 8 | -3023451.730664789299 - 9 | -2983069.716282812029 + ss | avg +----+------------------------ + 0 | -3230391.667404203963 + 3 | -2996690.484799677874 + 4 | -3090966.561316955296 + 5 | -2963500.289651010761 + 6 | -3033294.541331456154 + 7 | -3033294.541331456154 + 8 | -3023451.730664789299 + 9 | -2983069.716282812029 + 11 | -3001927.897165259349 + | -91030181.125000000000 (10 rows) select max(cint4) from aggfns where cfloat8 > 0 order by 1; @@ -2438,8 +2568,6 @@ select ss, max(cint4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+------------ 0 | 1073213373 - 1 | 1073213373 - 2 | 1073213373 3 | 1073213373 4 | 1073213373 5 | 1073213373 @@ -2447,6 +2575,8 @@ select ss, max(cint4) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 1073213373 8 | 1073213373 9 | 1073213373 + 11 | 1073213373 + | 978482473 (10 rows) select min(cint4) from aggfns where cfloat8 > 0 order by 1; @@ -2474,8 +2604,6 @@ select ss, min(cint4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+------------- 0 | -1073053412 - 1 | -1073053412 - 2 | -1073053412 3 | -1073053412 4 | -1073053412 5 | -1073053412 @@ -2483,6 +2611,8 @@ select ss, min(cint4) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | -1073053412 8 | -1073053412 9 | -1073053412 + 11 | -1073053412 + | -960881516 (10 rows) select stddev(cint4) from aggfns where cfloat8 > 0 order by 1; @@ -2510,15 +2640,15 @@ select ss, stddev(cint4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | stddev ----+----------- 0 | 622275017 - 1 | 622236051 - 2 | 622267230 - 3 | 622267230 - 4 | 622322391 + 3 | 622183925 + 4 | 622245451 5 | 622322391 6 | 622330015 7 | 622330015 8 | 622360538 9 | 622356873 + 11 | 622274449 + | 762095534 (10 rows) select sum(cint4) from aggfns where cfloat8 > 0 order by 1; @@ -2546,15 +2676,15 @@ select ss, sum(cint4) from aggfns where cfloat8 > 0 group by ss order by 1; ss | sum ----+-------------- 0 | -32119784349 - 1 | -30462389224 - 2 | -30497364725 - 3 | -30497364725 - 4 | -29466083380 + 3 | -29769123276 + 4 | -30699479887 5 | -29466083380 6 | -30163080919 7 | -30163080919 8 | -30062180558 9 | -29660662189 + 11 | -59726357442 + | -728241449 (10 rows) select avg(cint8) from aggfns where cfloat8 > 0 order by 1; @@ -2582,15 +2712,15 @@ select ss, avg(cint8) from aggfns where cfloat8 > 0 group by ss order by 1; ss | avg ----+----------------------- 0 | -7332793.526098762949 - 1 | -7334675.513225384693 - 2 | -7250229.067592033796 - 3 | -7250229.067592033796 - 4 | -7326050.594790304737 + 3 | -7271023.008153815180 + 4 | -7498519.579943616593 5 | -7326050.594790304737 6 | -7272459.754123089300 7 | -7272459.754123089300 8 | -7247017.818163532133 9 | -7258411.696771598109 + 11 | -7206377.192098914355 + | 18570646.625000000000 (10 rows) select max(cint8) from aggfns where cfloat8 > 0 order by 1; @@ -2618,8 +2748,6 @@ select ss, max(cint8) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+------------ 0 | 1073659785 - 1 | 1073659785 - 2 | 1073659785 3 | 1073659785 4 | 1073659785 5 | 1073659785 @@ -2627,6 +2755,8 @@ select ss, max(cint8) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 1073659785 8 | 1073659785 9 | 1073659785 + 11 | 1073659785 + | 969157980 (10 rows) select min(cint8) from aggfns where cfloat8 > 0 order by 1; @@ -2654,8 +2784,6 @@ select ss, min(cint8) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+------------- 0 | -1073184428 - 1 | -1073184428 - 2 | -1073184428 3 | -1073184428 4 | -1073184428 5 | -1073184428 @@ -2663,6 +2791,8 @@ select ss, min(cint8) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | -1073184428 8 | -1073184428 9 | -1073184428 + 11 | -1073184428 + | -672677796 (10 rows) select stddev(cint8) from aggfns where cfloat8 > 0 order by 1; @@ -2690,15 +2820,15 @@ select ss, stddev(cint8) from aggfns where cfloat8 > 0 group by ss order by 1; ss | stddev ----+----------- 0 | 623613696 - 1 | 623613596 - 2 | 623588105 - 3 | 623588105 - 4 | 623602577 + 3 | 623600813 + 4 | 623575078 5 | 623602577 6 | 623594116 7 | 623594116 8 | 623620316 9 | 623635702 + 11 | 623598886 + | 648969761 (10 rows) select sum(cint8) from aggfns where cfloat8 > 0 order by 1; @@ -2723,18 +2853,18 @@ select s, sum(cint8) from aggfns where cfloat8 > 0 group by s order by 1; (10 rows) select ss, sum(cint8) from aggfns where cfloat8 > 0 group by ss order by 1; - ss | sum -----+-------------- - 0 | -72909966030 - 1 | -72928678628 - 2 | -72081777390 - 3 | -72081777390 - 4 | -72842921064 - 5 | -72842921064 - 6 | -72317339795 - 7 | -72317339795 - 8 | -72057098166 - 9 | -72170387501 + ss | sum +----+--------------- + 0 | -72909966030 + 3 | -72230342563 + 4 | -74475296468 + 5 | -72842921064 + 6 | -72317339795 + 7 | -72317339795 + 8 | -72057098166 + 9 | -72170387501 + 11 | -143378080614 + | 148565173 (10 rows) select max(cts) from aggfns where cfloat8 > 0 order by 1; @@ -2762,8 +2892,6 @@ select ss, max(cts) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+-------------------------- 0 | Fri Jan 01 06:34:20 2021 - 1 | Fri Jan 01 09:21:01 2021 - 2 | Fri Jan 01 12:07:40 2021 3 | Fri Jan 01 14:54:19 2021 4 | Fri Jan 01 17:41:01 2021 5 | Fri Jan 01 20:27:40 2021 @@ -2771,6 +2899,8 @@ select ss, max(cts) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | Sat Jan 02 02:01:00 2021 8 | Sat Jan 02 04:47:39 2021 9 | Sat Jan 02 07:34:21 2021 + 11 | Fri Jan 01 17:09:56 2021 + | Fri Jan 01 14:45:52 2021 (10 rows) select min(cts) from aggfns where cfloat8 > 0 order by 1; @@ -2798,8 +2928,6 @@ select ss, min(cts) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+-------------------------- 0 | Fri Jan 01 01:01:02 2021 - 1 | Fri Jan 01 03:47:42 2021 - 2 | Fri Jan 01 06:34:27 2021 3 | Fri Jan 01 09:21:06 2021 4 | Fri Jan 01 12:07:45 2021 5 | Fri Jan 01 14:54:24 2021 @@ -2807,6 +2935,8 @@ select ss, min(cts) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | Fri Jan 01 20:27:42 2021 8 | Fri Jan 01 23:14:22 2021 9 | Sat Jan 02 02:01:02 2021 + 11 | Fri Jan 01 03:47:42 2021 + | Fri Jan 01 10:05:04 2021 (10 rows) select max(ctstz) from aggfns where cfloat8 > 0 order by 1; @@ -2834,8 +2964,6 @@ select ss, max(ctstz) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+------------------------------ 0 | Fri Jan 01 06:34:20 2021 PST - 1 | Fri Jan 01 09:21:01 2021 PST - 2 | Fri Jan 01 12:07:40 2021 PST 3 | Fri Jan 01 14:54:19 2021 PST 4 | Fri Jan 01 17:41:01 2021 PST 5 | Fri Jan 01 20:27:40 2021 PST @@ -2843,6 +2971,8 @@ select ss, max(ctstz) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | Sat Jan 02 02:01:00 2021 PST 8 | Sat Jan 02 04:47:39 2021 PST 9 | Sat Jan 02 07:34:21 2021 PST + 11 | Fri Jan 01 17:09:56 2021 PST + | Fri Jan 01 14:45:52 2021 PST (10 rows) select min(ctstz) from aggfns where cfloat8 > 0 order by 1; @@ -2870,8 +3000,6 @@ select ss, min(ctstz) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+------------------------------ 0 | Fri Jan 01 01:01:02 2021 PST - 1 | Fri Jan 01 03:47:42 2021 PST - 2 | Fri Jan 01 06:34:27 2021 PST 3 | Fri Jan 01 09:21:06 2021 PST 4 | Fri Jan 01 12:07:45 2021 PST 5 | Fri Jan 01 14:54:24 2021 PST @@ -2879,6 +3007,8 @@ select ss, min(ctstz) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | Fri Jan 01 20:27:42 2021 PST 8 | Fri Jan 01 23:14:22 2021 PST 9 | Sat Jan 02 02:01:02 2021 PST + 11 | Fri Jan 01 03:47:42 2021 PST + | Fri Jan 01 10:05:04 2021 PST (10 rows) select avg(s) from aggfns where cfloat8 > 0 order by 1; @@ -2906,8 +3036,6 @@ select ss, avg(s) from aggfns where cfloat8 > 0 group by ss order by 1; ss | avg ----+------------------------ 0 | 0.00000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 3 | 3.0000000000000000 4 | 4.0000000000000000 5 | 5.0000000000000000 @@ -2915,6 +3043,8 @@ select ss, avg(s) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 7.0000000000000000 8 | 8.0000000000000000 9 | 9.0000000000000000 + 11 | 1.5013570566948130 + | 3.0000000000000000 (10 rows) select count(s) from aggfns where cfloat8 > 0 order by 1; @@ -2942,15 +3072,15 @@ select ss, count(s) from aggfns where cfloat8 > 0 group by ss order by 1; ss | count ----+------- 0 | 9943 - 1 | 9943 - 2 | 9942 - 3 | 9942 - 4 | 9943 + 3 | 9934 + 4 | 9932 5 | 9943 6 | 9944 7 | 9944 8 | 9943 9 | 9943 + 11 | 19896 + | 8 (10 rows) select max(s) from aggfns where cfloat8 > 0 order by 1; @@ -2978,8 +3108,6 @@ select ss, max(s) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -2987,6 +3115,8 @@ select ss, max(s) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 4 + | 3 (10 rows) select min(s) from aggfns where cfloat8 > 0 order by 1; @@ -3014,8 +3144,6 @@ select ss, min(s) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -3023,6 +3151,8 @@ select ss, min(s) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 1 + | 3 (10 rows) select stddev(s) from aggfns where cfloat8 > 0 order by 1; @@ -3047,18 +3177,18 @@ select s, stddev(s) from aggfns where cfloat8 > 0 group by s order by 1; (10 rows) select ss, stddev(s) from aggfns where cfloat8 > 0 group by ss order by 1; - ss | stddev -----+-------- - 0 | 0 - 1 | 0 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 - 6 | 0 - 7 | 0 - 8 | 0 - 9 | 0 + ss | stddev +----+------------------------ + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0.50331713731225271878 + | 0 (10 rows) select sum(s) from aggfns where cfloat8 > 0 order by 1; @@ -3086,31 +3216,31 @@ select ss, sum(s) from aggfns where cfloat8 > 0 group by ss order by 1; ss | sum ----+------- 0 | 0 - 1 | 9943 - 2 | 19884 - 3 | 29826 - 4 | 39772 + 3 | 29802 + 4 | 39728 5 | 49715 6 | 59664 7 | 69608 8 | 79544 9 | 89487 + 11 | 29871 + | 24 (10 rows) select avg(ss) from aggfns where cfloat8 > 0 order by 1; avg -------------------- - 4.5000804586141004 + 6.4010379996379071 (1 row) select s, avg(ss) from aggfns where cfloat8 > 0 group by s order by 1; s | avg ---+------------------------ 0 | 0.00000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 + 1 | 11.0000000000000000 + 2 | 11.0000000000000000 3 | 3.0000000000000000 - 4 | 4.0000000000000000 + 4 | 4.0077441416071608 5 | 5.0000000000000000 6 | 6.0000000000000000 7 | 7.0000000000000000 @@ -3122,8 +3252,6 @@ select ss, avg(ss) from aggfns where cfloat8 > 0 group by ss order by 1; ss | avg ----+------------------------ 0 | 0.00000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 3 | 3.0000000000000000 4 | 4.0000000000000000 5 | 5.0000000000000000 @@ -3131,22 +3259,60 @@ select ss, avg(ss) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 7.0000000000000000 8 | 8.0000000000000000 9 | 9.0000000000000000 + 11 | 11.0000000000000000 + | +(10 rows) + +select count(ss) from aggfns where cfloat8 > 0 order by 1; + count +------- + 99422 +(1 row) + +select s, count(ss) from aggfns where cfloat8 > 0 group by s order by 1; + s | count +---+------- + 0 | 9943 + 1 | 9943 + 2 | 9942 + 3 | 9934 + 4 | 9943 + 5 | 9943 + 6 | 9944 + 7 | 9944 + 8 | 9943 + 9 | 9943 +(10 rows) + +select ss, count(ss) from aggfns where cfloat8 > 0 group by ss order by 1; + ss | count +----+------- + 0 | 9943 + 3 | 9934 + 4 | 9932 + 5 | 9943 + 6 | 9944 + 7 | 9944 + 8 | 9943 + 9 | 9943 + 11 | 19896 + | 0 (10 rows) select max(ss) from aggfns where cfloat8 > 0 order by 1; max ----- - 9 + 11 (1 row) select s, max(ss) from aggfns where cfloat8 > 0 group by s order by 1; s | max ---+----- 0 | 0 - 1 | 1 - 2 | 2 + 1 | 11 + 2 | 11 3 | 3 - 4 | 4 + 4 | 11 5 | 5 6 | 6 7 | 7 @@ -3158,8 +3324,6 @@ select ss, max(ss) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -3167,6 +3331,8 @@ select ss, max(ss) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 11 + | (10 rows) select min(ss) from aggfns where cfloat8 > 0 order by 1; @@ -3179,8 +3345,8 @@ select s, min(ss) from aggfns where cfloat8 > 0 group by s order by 1; s | min ---+----- 0 | 0 - 1 | 1 - 2 | 2 + 1 | 11 + 2 | 11 3 | 3 4 | 4 5 | 5 @@ -3194,8 +3360,6 @@ select ss, min(ss) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -3203,35 +3367,35 @@ select ss, min(ss) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 11 + | (10 rows) select stddev(ss) from aggfns where cfloat8 > 0 order by 1; stddev -------------------- - 2.8722957659869625 + 3.3528295838273201 (1 row) select s, stddev(ss) from aggfns where cfloat8 > 0 group by s order by 1; - s | stddev ----+-------- - 0 | 0 - 1 | 0 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 - 6 | 0 - 7 | 0 - 8 | 0 - 9 | 0 + s | stddev +---+------------------------ + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0.23271112137690657933 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 (10 rows) select ss, stddev(ss) from aggfns where cfloat8 > 0 group by ss order by 1; ss | stddev ----+-------- 0 | 0 - 1 | 0 - 2 | 0 3 | 0 4 | 0 5 | 0 @@ -3239,42 +3403,44 @@ select ss, stddev(ss) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 0 8 | 0 9 | 0 + 11 | 0 + | (10 rows) select sum(ss) from aggfns where cfloat8 > 0 order by 1; sum -------- - 447443 + 636404 (1 row) select s, sum(ss) from aggfns where cfloat8 > 0 group by s order by 1; - s | sum ----+------- - 0 | 0 - 1 | 9943 - 2 | 19884 - 3 | 29826 - 4 | 39772 - 5 | 49715 - 6 | 59664 - 7 | 69608 - 8 | 79544 - 9 | 89487 + s | sum +---+-------- + 0 | 0 + 1 | 109373 + 2 | 109362 + 3 | 29802 + 4 | 39849 + 5 | 49715 + 6 | 59664 + 7 | 69608 + 8 | 79544 + 9 | 89487 (10 rows) select ss, sum(ss) from aggfns where cfloat8 > 0 group by ss order by 1; - ss | sum -----+------- - 0 | 0 - 1 | 9943 - 2 | 19884 - 3 | 29826 - 4 | 39772 - 5 | 49715 - 6 | 59664 - 7 | 69608 - 8 | 79544 - 9 | 89487 + ss | sum +----+-------- + 0 | 0 + 3 | 29802 + 4 | 39728 + 5 | 49715 + 6 | 59664 + 7 | 69608 + 8 | 79544 + 9 | 89487 + 11 | 218856 + | (10 rows) select max(t) from aggfns where cfloat8 > 0 order by 1; @@ -3302,8 +3468,6 @@ select ss, max(t) from aggfns where cfloat8 > 0 group by ss order by 1; ss | max ----+-------- 0 | 19999 - 1 | 30000 - 2 | 39999 3 | 49998 4 | 60000 5 | 69999 @@ -3311,6 +3475,8 @@ select ss, max(t) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 89999 8 | 99998 9 | 110000 + 11 | 58135 + | 49491 (10 rows) select min(t) from aggfns where cfloat8 > 0 order by 1; @@ -3338,8 +3504,6 @@ select ss, min(t) from aggfns where cfloat8 > 0 group by ss order by 1; ss | min ----+------- 0 | 1 - 1 | 10001 - 2 | 20006 3 | 30005 4 | 40004 5 | 50003 @@ -3347,6 +3511,44 @@ select ss, min(t) from aggfns where cfloat8 > 0 group by ss order by 1; 7 | 70001 8 | 80001 9 | 90001 + 11 | 10001 + | 32643 +(10 rows) + +select count(*) from aggfns where cfloat8 <= 0 order by 1; + count +-------- + 100570 +(1 row) + +select s, count(*) from aggfns where cfloat8 <= 0 group by s order by 1; + s | count +---+------- + 0 | 10057 + 1 | 10057 + 2 | 10058 + 3 | 10058 + 4 | 10057 + 5 | 10057 + 6 | 10056 + 7 | 10056 + 8 | 10057 + 9 | 10057 +(10 rows) + +select ss, count(*) from aggfns where cfloat8 <= 0 group by ss order by 1; + ss | count +----+------- + 0 | 10057 + 3 | 10047 + 4 | 10049 + 5 | 10057 + 6 | 10056 + 7 | 10056 + 8 | 10057 + 9 | 10057 + 11 | 20123 + | 11 (10 rows) select max(cdate) from aggfns where cfloat8 <= 0 order by 1; @@ -3374,8 +3576,6 @@ select ss, max(cdate) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+------------ 0 | 10-05-2075 - 1 | 02-20-2103 - 2 | 07-09-2130 3 | 11-24-2157 4 | 04-10-2185 5 | 08-28-2212 @@ -3383,6 +3583,8 @@ select ss, max(cdate) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 06-01-2267 8 | 10-17-2294 9 | 03-04-2322 + 11 | 01-24-2183 + | 08-15-2153 (10 rows) select min(cdate) from aggfns where cfloat8 <= 0 order by 1; @@ -3410,8 +3612,6 @@ select ss, min(cdate) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+------------ 0 | 01-04-2021 - 1 | 05-21-2048 - 2 | 10-06-2075 3 | 02-22-2103 4 | 07-10-2130 5 | 11-25-2157 @@ -3419,6 +3619,8 @@ select ss, min(cdate) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 09-01-2212 8 | 01-17-2240 9 | 06-03-2267 + 11 | 05-21-2048 + | 08-11-2104 (10 rows) select avg(cfloat4) from aggfns where cfloat8 <= 0 order by 1; @@ -3446,15 +3648,15 @@ select ss, avg(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | avg ----+------------------- 0 | 0.273710566446533 - 1 | NaN - 2 | Infinity - 3 | 0.271225418012403 - 4 | 0.274902188431565 + 3 | 0.281539709766287 + 4 | 0.290259015901558 5 | 0.278701234893647 6 | 0.279196201482741 7 | 0.281055561785383 8 | 0.276832673694496 9 | 0.276832673694496 + 11 | NaN + | -9.14947360483083 (10 rows) select count(cfloat4) from aggfns where cfloat8 <= 0 order by 1; @@ -3482,15 +3684,15 @@ select ss, count(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | count ----+------- 0 | 10057 - 1 | 10057 - 2 | 10058 - 3 | 10058 - 4 | 10057 + 3 | 10047 + 4 | 10049 5 | 10057 6 | 10056 7 | 10056 8 | 10057 9 | 10057 + 11 | 20123 + | 11 (10 rows) select max(cfloat4) from aggfns where cfloat8 <= 0 order by 1; @@ -3515,18 +3717,18 @@ select s, max(cfloat4) from aggfns where cfloat8 <= 0 group by s order by 1; (10 rows) select ss, max(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1; - ss | max -----+---------- - 0 | 49.9977 - 1 | NaN - 2 | Infinity - 3 | 49.9977 - 4 | 49.9977 - 5 | 49.9977 - 6 | 49.9977 - 7 | 49.9977 - 8 | 49.9977 - 9 | 49.9977 + ss | max +----+--------- + 0 | 49.9977 + 3 | 49.9977 + 4 | 49.9977 + 5 | 49.9977 + 6 | 49.9977 + 7 | 49.9977 + 8 | 49.9977 + 9 | 49.9977 + 11 | NaN + | 48.478 (10 rows) select min(cfloat4) from aggfns where cfloat8 <= 0 order by 1; @@ -3554,8 +3756,6 @@ select ss, min(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+---------- 0 | -49.9756 - 1 | NaN - 2 | -49.9756 3 | -49.9756 4 | -49.9756 5 | -49.9756 @@ -3563,6 +3763,8 @@ select ss, min(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | -49.9756 8 | -49.9756 9 | -49.9756 + 11 | NaN + | -46.7478 (10 rows) select stddev(cfloat4) from aggfns where cfloat8 <= 0 order by 1; @@ -3590,15 +3792,15 @@ select ss, stddev(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1 ss | stddev ----+------------------ 0 | 28.8948722701767 - 1 | NaN - 2 | NaN - 3 | 28.8971947735822 - 4 | 28.8962786797298 + 3 | 28.8951497341689 + 4 | 28.8918331282339 5 | 28.8969485514879 6 | 28.898342825597 7 | 28.8981465590944 8 | 28.8998126918449 9 | 28.8998126918449 + 11 | NaN + | 30.6521210689863 (10 rows) select sum(cfloat4) from aggfns where cfloat8 <= 0 order by 1; @@ -3626,15 +3828,15 @@ select ss, sum(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | sum ----+---------- 0 | 2752.71 - 1 | NaN - 2 | Infinity - 3 | 2727.99 - 4 | 2764.69 + 3 | 2828.63 + 4 | 2916.81 5 | 2802.9 6 | 2807.6 7 | 2826.29 8 | 2784.11 9 | 2784.11 + 11 | NaN + | -100.644 (10 rows) select avg(cfloat8) from aggfns where cfloat8 <= 0 order by 1; @@ -3662,15 +3864,15 @@ select ss, avg(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | avg ----+------------------- 0 | -25.0026878159296 - 1 | -25.0026878159296 - 2 | -25.0037259853644 - 3 | -25.0033035766331 - 4 | -25.0044243209649 + 3 | -25.0067336719196 + 4 | -24.9970119951125 5 | -25.0071564832908 6 | -25.0084381401581 7 | -25.0089627629465 8 | -25.0089281362381 9 | -25.0089281362381 + 11 | -25.0069089689736 + | -21.870379272679 (10 rows) select max(cfloat8) from aggfns where cfloat8 <= 0 order by 1; @@ -3698,8 +3900,6 @@ select ss, max(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+----------------------- 0 | -0.000542588531970978 - 1 | -0.000542588531970978 - 2 | -0.000542588531970978 3 | -0.000542588531970978 4 | -0.000542588531970978 5 | -0.000542588531970978 @@ -3707,6 +3907,8 @@ select ss, max(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | -0.000542588531970978 8 | -0.000542588531970978 9 | -0.000542588531970978 + 11 | -0.000542588531970978 + | -4.53631093259901 (10 rows) select min(cfloat8) from aggfns where cfloat8 <= 0 order by 1; @@ -3734,8 +3936,6 @@ select ss, min(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+------------------- 0 | -49.9755693599582 - 1 | -49.9755693599582 - 2 | -49.9755693599582 3 | -49.9755693599582 4 | -49.9755693599582 5 | -49.9755693599582 @@ -3743,6 +3943,8 @@ select ss, min(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | -49.9755693599582 8 | -49.9755693599582 9 | -49.9755693599582 + 11 | -49.9755693599582 + | -38.3512130472809 (10 rows) select stddev(cfloat8) from aggfns where cfloat8 <= 0 order by 1; @@ -3770,15 +3972,15 @@ select ss, stddev(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by 1 ss | stddev ----+------------------ 0 | 14.4821730365752 - 1 | 14.4821730365752 - 2 | 14.4818272972739 - 3 | 14.4815478866618 - 4 | 14.48183169158 + 3 | 14.485819356309 + 4 | 14.4810339821535 5 | 14.4805923063393 6 | 14.4807418968606 7 | 14.4812687827813 8 | 14.4805491499824 9 | 14.4805491499824 + 11 | 14.4820374611522 + | 9.77628724519737 (10 rows) select sum(cfloat8) from aggfns where cfloat8 <= 0 order by 1; @@ -3806,15 +4008,15 @@ select ss, sum(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | sum ----+------------------- 0 | -251452.031364804 - 1 | -251452.031364804 - 2 | -251487.475960795 - 3 | -251483.227373776 - 4 | -251469.495395944 + 3 | -251242.653201777 + 4 | -251194.973538886 5 | -251496.972752456 6 | -251484.85393743 7 | -251490.12954419 8 | -251514.790266147 9 | -251514.790266147 + 11 | -503214.029182657 + | -240.574171999469 (10 rows) select avg(cint2) from aggfns where cfloat8 <= 0 order by 1; @@ -3839,18 +4041,18 @@ select s, avg(cint2) from aggfns where cfloat8 <= 0 group by s order by 1; (10 rows) select ss, avg(cint2) from aggfns where cfloat8 <= 0 group by ss order by 1; - ss | avg -----+----------------------- - 0 | -145.9491390464815368 - 1 | -146.0055732484076433 - 2 | -145.5036823248407643 - 3 | -144.9877599761170266 - 4 | -141.3792931806869089 - 5 | -139.5590167197452229 - 6 | -141.8382440772446745 - 7 | -144.4554140127388535 - 8 | -137.4765033851055356 - 9 | -144.4668060117447994 + ss | avg +----+------------------------ + 0 | -145.9491390464815368 + 3 | -143.3365212193664077 + 4 | -139.6013749128225565 + 5 | -139.5590167197452229 + 6 | -141.8382440772446745 + 7 | -144.4554140127388535 + 8 | -137.4765033851055356 + 9 | -144.4668060117447994 + 11 | -146.6405192996418623 + | -1651.8181818181818182 (10 rows) select max(cint2) from aggfns where cfloat8 <= 0 order by 1; @@ -3878,8 +4080,6 @@ select ss, max(cint2) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+------- 0 | 16383 - 1 | 16383 - 2 | 16383 3 | 16383 4 | 16383 5 | 16383 @@ -3887,6 +4087,8 @@ select ss, max(cint2) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 16383 8 | 16383 9 | 16383 + 11 | 16383 + | 10616 (10 rows) select min(cint2) from aggfns where cfloat8 <= 0 order by 1; @@ -3914,8 +4116,6 @@ select ss, min(cint2) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+-------- 0 | -16375 - 1 | -16375 - 2 | -16375 3 | -16375 4 | -16375 5 | -16375 @@ -3923,6 +4123,8 @@ select ss, min(cint2) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | -16375 8 | -16375 9 | -16375 + 11 | -16375 + | -15206 (10 rows) select stddev(cint2) from aggfns where cfloat8 <= 0 order by 1; @@ -3950,15 +4152,15 @@ select ss, stddev(cint2) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | stddev ----+------------------- 0 | 9538.832178541630 - 1 | 9537.717255354464 - 2 | 9540.157933140146 - 3 | 9535.281475740805 - 4 | 9537.335027198989 + 3 | 9535.623908566148 + 4 | 9537.357537825379 5 | 9538.686769657499 6 | 9537.494931550558 7 | 9538.995155888144 8 | 9537.123911034158 9 | 9537.962959664725 + 11 | 9538.688016316899 + | 9537.230025727405 (10 rows) select sum(cint2) from aggfns where cfloat8 <= 0 order by 1; @@ -3986,15 +4188,15 @@ select ss, sum(cint2) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | sum ----+---------- 0 | -1466351 - 1 | -1467064 - 2 | -1462021 - 3 | -1456982 - 4 | -1420155 + 3 | -1438812 + 4 | -1401179 5 | -1402289 6 | -1424907 7 | -1451488 8 | -1380814 9 | -1451458 + 11 | -2948061 + | -18170 (10 rows) select avg(cint4) from aggfns where cfloat8 <= 0 order by 1; @@ -4022,15 +4224,15 @@ select ss, avg(cint4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | avg ----+----------------------- 0 | -2611631.507507208909 - 1 | -2611631.507507208909 - 2 | -2609754.938158679658 - 3 | -2572787.244482004375 - 4 | -2583154.535149647012 + 3 | -2371975.736040609137 + 4 | -2420650.042193253060 5 | -2738485.569155811872 6 | -2660373.598150357995 7 | -2578784.074681782021 8 | -2647231.455304762852 9 | -2647231.455304762852 + 11 | -2691833.530040252447 + | -185986716.81818182 (10 rows) select max(cint4) from aggfns where cfloat8 <= 0 order by 1; @@ -4058,8 +4260,6 @@ select ss, max(cint4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+------------ 0 | 1073660631 - 1 | 1073660631 - 2 | 1073660631 3 | 1073660631 4 | 1073660631 5 | 1073660631 @@ -4067,6 +4267,8 @@ select ss, max(cint4) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 1073660631 8 | 1073660631 9 | 1073660631 + 11 | 1073660631 + | 556763469 (10 rows) select min(cint4) from aggfns where cfloat8 <= 0 order by 1; @@ -4094,8 +4296,6 @@ select ss, min(cint4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+------------- 0 | -1073184428 - 1 | -1073184428 - 2 | -1073184428 3 | -1073184428 4 | -1073184428 5 | -1073184428 @@ -4103,6 +4303,8 @@ select ss, min(cint4) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | -1073184428 8 | -1073184428 9 | -1073184428 + 11 | -1073184428 + | -888962001 (10 rows) select stddev(cint4) from aggfns where cfloat8 <= 0 order by 1; @@ -4130,15 +4332,15 @@ select ss, stddev(cint4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | stddev ----+----------- 0 | 618765816 - 1 | 618765816 - 2 | 618735080 - 3 | 618695738 - 4 | 618725626 + 3 | 618865631 + 4 | 618722231 5 | 618719952 6 | 618701125 7 | 618716683 8 | 618723996 9 | 618723996 + 11 | 618736738 + | 415688814 (10 rows) select sum(cint4) from aggfns where cfloat8 <= 0 order by 1; @@ -4166,15 +4368,15 @@ select ss, sum(cint4) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | sum ----+-------------- 0 | -26265178071 - 1 | -26265178071 - 2 | -26248915168 - 3 | -25877094105 - 4 | -25978785160 + 3 | -23831240220 + 4 | -24325112274 5 | -27540949369 6 | -26752716903 7 | -25932252655 8 | -26623206746 9 | -26623206746 + 11 | -54167766125 + | -2045853885 (10 rows) select avg(cint8) from aggfns where cfloat8 <= 0 order by 1; @@ -4202,15 +4404,15 @@ select ss, avg(cint8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | avg ----+---------------------- 0 | 1609068.184846375659 - 1 | 1609068.184846375659 - 2 | 1561674.145953469875 - 3 | 1654097.121694173792 - 4 | 1574613.534354181167 + 3 | 1911095.221857270827 + 4 | 1427463.756393671012 5 | 1583685.317888038182 6 | 1613166.887529832936 7 | 1554490.104514717582 8 | 1568383.139206522820 9 | 1568383.139206522820 + 11 | 1658849.193559608408 + | -233078622.18181818 (10 rows) select max(cint8) from aggfns where cfloat8 <= 0 order by 1; @@ -4238,8 +4440,6 @@ select ss, max(cint8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+------------ 0 | 1073660631 - 1 | 1073660631 - 2 | 1073660631 3 | 1073660631 4 | 1073660631 5 | 1073660631 @@ -4247,6 +4447,8 @@ select ss, max(cint8) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 1073660631 8 | 1073660631 9 | 1073660631 + 11 | 1073660631 + | 989667573 (10 rows) select min(cint8) from aggfns where cfloat8 <= 0 order by 1; @@ -4274,8 +4476,6 @@ select ss, min(cint8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+------------- 0 | -1073053412 - 1 | -1073053412 - 2 | -1073053412 3 | -1073053412 4 | -1073053412 5 | -1073053412 @@ -4283,6 +4483,8 @@ select ss, min(cint8) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | -1073053412 8 | -1073053412 9 | -1073053412 + 11 | -1073053412 + | -796158533 (10 rows) select stddev(cint8) from aggfns where cfloat8 <= 0 order by 1; @@ -4310,15 +4512,15 @@ select ss, stddev(cint8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | stddev ----+----------- 0 | 617360997 - 1 | 617360997 - 2 | 617348601 - 3 | 617433171 - 4 | 617412408 + 3 | 617445433 + 4 | 617369297 5 | 617401473 6 | 617425094 7 | 617462874 8 | 617433744 9 | 617433744 + 11 | 617360997 + | 586053845 (10 rows) select sum(cint8) from aggfns where cfloat8 <= 0 order by 1; @@ -4346,15 +4548,15 @@ select ss, sum(cint8) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | sum ----+------------- 0 | 16182398735 - 1 | 16182398735 - 2 | 15707318560 - 3 | 16636908850 - 4 | 15835888315 + 3 | 19200773694 + 4 | 14344583288 5 | 15927123242 6 | 16222006221 7 | 15631952491 8 | 15773229231 9 | 15773229231 + 11 | 33381022322 + | -2563864844 (10 rows) select max(cts) from aggfns where cfloat8 <= 0 order by 1; @@ -4382,8 +4584,6 @@ select ss, max(cts) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+-------------------------- 0 | Fri Jan 01 06:34:21 2021 - 1 | Fri Jan 01 09:21:00 2021 - 2 | Fri Jan 01 12:07:41 2021 3 | Fri Jan 01 14:54:21 2021 4 | Fri Jan 01 17:41:00 2021 5 | Fri Jan 01 20:27:41 2021 @@ -4391,6 +4591,8 @@ select ss, max(cts) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | Sat Jan 02 02:01:01 2021 8 | Sat Jan 02 04:47:41 2021 9 | Sat Jan 02 07:34:20 2021 + 11 | Fri Jan 01 17:27:33 2021 + | Fri Jan 01 14:28:19 2021 (10 rows) select min(cts) from aggfns where cfloat8 <= 0 order by 1; @@ -4418,8 +4620,6 @@ select ss, min(cts) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+-------------------------- 0 | Fri Jan 01 01:01:04 2021 - 1 | Fri Jan 01 03:47:43 2021 - 2 | Fri Jan 01 06:34:22 2021 3 | Fri Jan 01 09:21:02 2021 4 | Fri Jan 01 12:07:42 2021 5 | Fri Jan 01 14:54:22 2021 @@ -4427,6 +4627,8 @@ select ss, min(cts) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | Fri Jan 01 20:27:45 2021 8 | Fri Jan 01 23:14:24 2021 9 | Sat Jan 02 02:01:03 2021 + 11 | Fri Jan 01 03:47:43 2021 + | Fri Jan 01 09:29:58 2021 (10 rows) select max(ctstz) from aggfns where cfloat8 <= 0 order by 1; @@ -4454,8 +4656,6 @@ select ss, max(ctstz) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+------------------------------ 0 | Fri Jan 01 06:34:21 2021 PST - 1 | Fri Jan 01 09:21:00 2021 PST - 2 | Fri Jan 01 12:07:41 2021 PST 3 | Fri Jan 01 14:54:21 2021 PST 4 | Fri Jan 01 17:41:00 2021 PST 5 | Fri Jan 01 20:27:41 2021 PST @@ -4463,6 +4663,8 @@ select ss, max(ctstz) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | Sat Jan 02 02:01:01 2021 PST 8 | Sat Jan 02 04:47:41 2021 PST 9 | Sat Jan 02 07:34:20 2021 PST + 11 | Fri Jan 01 17:27:33 2021 PST + | Fri Jan 01 14:28:19 2021 PST (10 rows) select min(ctstz) from aggfns where cfloat8 <= 0 order by 1; @@ -4490,8 +4692,6 @@ select ss, min(ctstz) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+------------------------------ 0 | Fri Jan 01 01:01:04 2021 PST - 1 | Fri Jan 01 03:47:43 2021 PST - 2 | Fri Jan 01 06:34:22 2021 PST 3 | Fri Jan 01 09:21:02 2021 PST 4 | Fri Jan 01 12:07:42 2021 PST 5 | Fri Jan 01 14:54:22 2021 PST @@ -4499,6 +4699,8 @@ select ss, min(ctstz) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | Fri Jan 01 20:27:45 2021 PST 8 | Fri Jan 01 23:14:24 2021 PST 9 | Sat Jan 02 02:01:03 2021 PST + 11 | Fri Jan 01 03:47:43 2021 PST + | Fri Jan 01 09:29:58 2021 PST (10 rows) select avg(s) from aggfns where cfloat8 <= 0 order by 1; @@ -4526,8 +4728,6 @@ select ss, avg(s) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | avg ----+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 3 | 3.0000000000000000 4 | 4.0000000000000000 5 | 5.0000000000000000 @@ -4535,6 +4735,8 @@ select ss, avg(s) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 7.0000000000000000 8 | 8.0000000000000000 9 | 9.0000000000000000 + 11 | 1.5010187347810963 + | 3.0000000000000000 (10 rows) select count(s) from aggfns where cfloat8 <= 0 order by 1; @@ -4562,15 +4764,15 @@ select ss, count(s) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | count ----+------- 0 | 10057 - 1 | 10057 - 2 | 10058 - 3 | 10058 - 4 | 10057 + 3 | 10047 + 4 | 10049 5 | 10057 6 | 10056 7 | 10056 8 | 10057 9 | 10057 + 11 | 20123 + | 11 (10 rows) select max(s) from aggfns where cfloat8 <= 0 order by 1; @@ -4598,8 +4800,6 @@ select ss, max(s) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -4607,6 +4807,8 @@ select ss, max(s) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 4 + | 3 (10 rows) select min(s) from aggfns where cfloat8 <= 0 order by 1; @@ -4634,8 +4836,6 @@ select ss, min(s) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -4643,6 +4843,8 @@ select ss, min(s) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 1 + | 3 (10 rows) select stddev(s) from aggfns where cfloat8 <= 0 order by 1; @@ -4667,18 +4869,18 @@ select s, stddev(s) from aggfns where cfloat8 <= 0 group by s order by 1; (10 rows) select ss, stddev(s) from aggfns where cfloat8 <= 0 group by ss order by 1; - ss | stddev -----+-------- - 0 | 0 - 1 | 0 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 - 6 | 0 - 7 | 0 - 8 | 0 - 9 | 0 + ss | stddev +----+------------------------ + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0.50239111765906328317 + | 0 (10 rows) select sum(s) from aggfns where cfloat8 <= 0 order by 1; @@ -4706,31 +4908,31 @@ select ss, sum(s) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | sum ----+------- 0 | 0 - 1 | 10057 - 2 | 20116 - 3 | 30174 - 4 | 40228 + 3 | 30141 + 4 | 40196 5 | 50285 6 | 60336 7 | 70392 8 | 80456 9 | 90513 + 11 | 30205 + | 33 (10 rows) select avg(ss) from aggfns where cfloat8 <= 0 order by 1; avg -------------------- - 4.4999204534155315 + 6.4009387523742281 (1 row) select s, avg(ss) from aggfns where cfloat8 <= 0 group by s order by 1; s | avg ---+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 + 1 | 11.0000000000000000 + 2 | 11.0000000000000000 3 | 3.0000000000000000 - 4 | 4.0000000000000000 + 4 | 4.0055682609127971 5 | 5.0000000000000000 6 | 6.0000000000000000 7 | 7.0000000000000000 @@ -4742,8 +4944,6 @@ select ss, avg(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | avg ----+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 3 | 3.0000000000000000 4 | 4.0000000000000000 5 | 5.0000000000000000 @@ -4751,22 +4951,60 @@ select ss, avg(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 7.0000000000000000 8 | 8.0000000000000000 9 | 9.0000000000000000 + 11 | 11.0000000000000000 + | +(10 rows) + +select count(ss) from aggfns where cfloat8 <= 0 order by 1; + count +-------- + 100559 +(1 row) + +select s, count(ss) from aggfns where cfloat8 <= 0 group by s order by 1; + s | count +---+------- + 0 | 10057 + 1 | 10057 + 2 | 10058 + 3 | 10047 + 4 | 10057 + 5 | 10057 + 6 | 10056 + 7 | 10056 + 8 | 10057 + 9 | 10057 +(10 rows) + +select ss, count(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; + ss | count +----+------- + 0 | 10057 + 3 | 10047 + 4 | 10049 + 5 | 10057 + 6 | 10056 + 7 | 10056 + 8 | 10057 + 9 | 10057 + 11 | 20123 + | 0 (10 rows) select max(ss) from aggfns where cfloat8 <= 0 order by 1; max ----- - 9 + 11 (1 row) select s, max(ss) from aggfns where cfloat8 <= 0 group by s order by 1; s | max ---+----- 0 | 0 - 1 | 1 - 2 | 2 + 1 | 11 + 2 | 11 3 | 3 - 4 | 4 + 4 | 11 5 | 5 6 | 6 7 | 7 @@ -4778,8 +5016,6 @@ select ss, max(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -4787,6 +5023,8 @@ select ss, max(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 11 + | (10 rows) select min(ss) from aggfns where cfloat8 <= 0 order by 1; @@ -4799,8 +5037,8 @@ select s, min(ss) from aggfns where cfloat8 <= 0 group by s order by 1; s | min ---+----- 0 | 0 - 1 | 1 - 2 | 2 + 1 | 11 + 2 | 11 3 | 3 4 | 4 5 | 5 @@ -4814,8 +5052,6 @@ select ss, min(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -4823,35 +5059,35 @@ select ss, min(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 11 + | (10 rows) select stddev(ss) from aggfns where cfloat8 <= 0 order by 1; stddev -------------------- - 2.8722956022845549 + 3.3528527058531930 (1 row) select s, stddev(ss) from aggfns where cfloat8 <= 0 group by s order by 1; - s | stddev ----+-------- - 0 | 0 - 1 | 0 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 - 6 | 0 - 7 | 0 - 8 | 0 - 9 | 0 + s | stddev +---+------------------------ + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0.19735930141067535100 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 (10 rows) select ss, stddev(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | stddev ----+-------- 0 | 0 - 1 | 0 - 2 | 0 3 | 0 4 | 0 5 | 0 @@ -4859,42 +5095,44 @@ select ss, stddev(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 0 8 | 0 9 | 0 + 11 | 0 + | (10 rows) select sum(ss) from aggfns where cfloat8 <= 0 order by 1; sum -------- - 452557 + 643672 (1 row) select s, sum(ss) from aggfns where cfloat8 <= 0 group by s order by 1; - s | sum ----+------- - 0 | 0 - 1 | 10057 - 2 | 20116 - 3 | 30174 - 4 | 40228 - 5 | 50285 - 6 | 60336 - 7 | 70392 - 8 | 80456 - 9 | 90513 + s | sum +---+-------- + 0 | 0 + 1 | 110627 + 2 | 110638 + 3 | 30141 + 4 | 40284 + 5 | 50285 + 6 | 60336 + 7 | 70392 + 8 | 80456 + 9 | 90513 (10 rows) select ss, sum(ss) from aggfns where cfloat8 <= 0 group by ss order by 1; - ss | sum -----+------- - 0 | 0 - 1 | 10057 - 2 | 20116 - 3 | 30174 - 4 | 40228 - 5 | 50285 - 6 | 60336 - 7 | 70392 - 8 | 80456 - 9 | 90513 + ss | sum +----+-------- + 0 | 0 + 3 | 30141 + 4 | 40196 + 5 | 50285 + 6 | 60336 + 7 | 70392 + 8 | 80456 + 9 | 90513 + 11 | 221353 + | (10 rows) select max(t) from aggfns where cfloat8 <= 0 order by 1; @@ -4922,8 +5160,6 @@ select ss, max(t) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | max ----+-------- 0 | 20000 - 1 | 29999 - 2 | 40000 3 | 50000 4 | 59999 5 | 70000 @@ -4931,6 +5167,8 @@ select ss, max(t) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 90000 8 | 100000 9 | 109999 + 11 | 59192 + | 48438 (10 rows) select min(t) from aggfns where cfloat8 <= 0 order by 1; @@ -4958,8 +5196,6 @@ select ss, min(t) from aggfns where cfloat8 <= 0 group by ss order by 1; ss | min ----+------- 0 | 3 - 1 | 10002 - 2 | 20001 3 | 30001 4 | 40001 5 | 50001 @@ -4967,6 +5203,44 @@ select ss, min(t) from aggfns where cfloat8 <= 0 group by ss order by 1; 7 | 70004 8 | 80003 9 | 90002 + 11 | 10002 + | 30537 +(10 rows) + +select count(*) from aggfns where cfloat8 < 1000 order by 1; + count +-------- + 200000 +(1 row) + +select s, count(*) from aggfns where cfloat8 < 1000 group by s order by 1; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 20000 + 4 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select ss, count(*) from aggfns where cfloat8 < 1000 group by ss order by 1; + ss | count +----+------- + 0 | 20000 + 3 | 19981 + 4 | 19981 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 + 11 | 40019 + | 19 (10 rows) select max(cdate) from aggfns where cfloat8 < 1000 order by 1; @@ -4994,8 +5268,6 @@ select ss, max(cdate) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+------------ 0 | 10-05-2075 - 1 | 02-21-2103 - 2 | 07-09-2130 3 | 11-24-2157 4 | 04-11-2185 5 | 08-28-2212 @@ -5003,6 +5275,8 @@ select ss, max(cdate) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 06-01-2267 8 | 10-17-2294 9 | 03-05-2322 + 11 | 01-24-2183 + | 07-03-2156 (10 rows) select min(cdate) from aggfns where cfloat8 < 1000 order by 1; @@ -5030,8 +5304,6 @@ select ss, min(cdate) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+------------ 0 | 01-02-2021 - 1 | 05-20-2048 - 2 | 10-06-2075 3 | 02-22-2103 4 | 07-10-2130 5 | 11-25-2157 @@ -5039,6 +5311,8 @@ select ss, min(cdate) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 08-29-2212 8 | 01-15-2240 9 | 06-02-2267 + 11 | 05-20-2048 + | 08-11-2104 (10 rows) select avg(cfloat4) from aggfns where cfloat8 < 1000 order by 1; @@ -5066,15 +5340,15 @@ select ss, avg(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | avg ----+-------------------- 0 | -0.132126759885764 - 1 | NaN - 2 | Infinity 3 | -Infinity - 4 | -0.13252146150968 + 4 | -0.124977103379316 5 | -0.130611110996222 6 | -0.131984978889441 7 | -0.131050092529273 8 | -0.131313872741675 9 | -0.132765194868064 + 11 | NaN + | -5.91588952823689 (10 rows) select count(cfloat4) from aggfns where cfloat8 < 1000 order by 1; @@ -5102,15 +5376,15 @@ select ss, count(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by ss | count ----+------- 0 | 20000 - 1 | 20000 - 2 | 20000 - 3 | 20000 - 4 | 20000 + 3 | 19981 + 4 | 19981 5 | 20000 6 | 20000 7 | 20000 8 | 20000 9 | 20000 + 11 | 40019 + | 19 (10 rows) select max(cfloat4) from aggfns where cfloat8 < 1000 order by 1; @@ -5135,18 +5409,18 @@ select s, max(cfloat4) from aggfns where cfloat8 < 1000 group by s order by 1; (10 rows) select ss, max(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by 1; - ss | max -----+---------- - 0 | 49.9977 - 1 | NaN - 2 | Infinity - 3 | 49.9977 - 4 | 49.9977 - 5 | 49.9977 - 6 | 49.9977 - 7 | 49.9977 - 8 | 49.9977 - 9 | 49.9977 + ss | max +----+--------- + 0 | 49.9977 + 3 | 49.9977 + 4 | 49.9977 + 5 | 49.9977 + 6 | 49.9977 + 7 | 49.9977 + 8 | 49.9977 + 9 | 49.9977 + 11 | NaN + | 48.478 (10 rows) select min(cfloat4) from aggfns where cfloat8 < 1000 order by 1; @@ -5174,8 +5448,6 @@ select ss, min(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+----------- 0 | -49.9756 - 1 | NaN - 2 | -49.9756 3 | -Infinity 4 | -49.9756 5 | -49.9756 @@ -5183,6 +5455,8 @@ select ss, min(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | -49.9756 8 | -49.9756 9 | -49.9756 + 11 | NaN + | -46.7478 (10 rows) select stddev(cfloat4) from aggfns where cfloat8 < 1000 order by 1; @@ -5210,15 +5484,15 @@ select ss, stddev(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by ss | stddev ----+------------------ 0 | 28.8941380063427 - 1 | NaN - 2 | NaN 3 | NaN - 4 | 28.8948189281654 + 4 | 28.89302954083 5 | 28.8951827753267 6 | 28.8960531969495 7 | 28.8959678301628 8 | 28.8963276918371 9 | 28.8968307405966 + 11 | NaN + | 27.2867943876683 (10 rows) select sum(cfloat4) from aggfns where cfloat8 < 1000 order by 1; @@ -5246,15 +5520,15 @@ select ss, sum(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | sum ----+----------- 0 | -2642.54 - 1 | NaN - 2 | Infinity 3 | -Infinity - 4 | -2650.43 + 4 | -2497.17 5 | -2612.22 6 | -2639.7 7 | -2621 8 | -2626.28 9 | -2655.3 + 11 | NaN + | -112.402 (10 rows) select avg(cfloat8) from aggfns where cfloat8 < 1000 order by 1; @@ -5282,15 +5556,15 @@ select ss, avg(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | avg ----+-------------------- 0 | -0.131261021163082 - 1 | -0.129096584053477 - 2 | -0.132733892038232 - 3 | -0.132521462687291 - 4 | -0.130611112199258 + 3 | -0.128550004763072 + 4 | -0.129264304572168 5 | -0.131984980024863 6 | -0.131050093692029 7 | -0.13131387403002 8 | -0.132765196124092 9 | -0.134423591727391 + 11 | -0.131587538323368 + | -4.30903203025656 (10 rows) select max(cfloat8) from aggfns where cfloat8 < 1000 order by 1; @@ -5315,18 +5589,18 @@ select s, max(cfloat8) from aggfns where cfloat8 < 1000 group by s order by 1; (10 rows) select ss, max(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by 1; - ss | max -----+----------------- - 0 | 49.997744965367 - 1 | 49.997744965367 - 2 | 49.997744965367 - 3 | 49.997744965367 - 4 | 49.997744965367 - 5 | 49.997744965367 - 6 | 49.997744965367 - 7 | 49.997744965367 - 8 | 49.997744965367 - 9 | 49.997744965367 + ss | max +----+------------------ + 0 | 49.997744965367 + 3 | 49.997744965367 + 4 | 49.997744965367 + 5 | 49.997744965367 + 6 | 49.997744965367 + 7 | 49.997744965367 + 8 | 49.997744965367 + 9 | 49.997744965367 + 11 | 49.997744965367 + | 49.2015035590157 (10 rows) select min(cfloat8) from aggfns where cfloat8 < 1000 order by 1; @@ -5354,8 +5628,6 @@ select ss, min(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+------------------- 0 | -49.9755693599582 - 1 | -49.9755693599582 - 2 | -49.9755693599582 3 | -49.9755693599582 4 | -49.9755693599582 5 | -49.9755693599582 @@ -5363,6 +5635,8 @@ select ss, min(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | -49.9755693599582 8 | -49.9755693599582 9 | -49.9755693599582 + 11 | -49.9755693599582 + | -38.3512130472809 (10 rows) select stddev(cfloat8) from aggfns where cfloat8 < 1000 order by 1; @@ -5390,15 +5664,15 @@ select ss, stddev(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by ss | stddev ----+------------------ 0 | 28.893219634188 - 1 | 28.8952055755515 - 2 | 28.8950722121689 - 3 | 28.8948189369737 - 4 | 28.8951827840888 + 3 | 28.8988694216911 + 4 | 28.8937408619903 5 | 28.8960532056265 6 | 28.8959678388464 7 | 28.8963277006942 8 | 28.8968307494196 9 | 28.8953209642426 + 11 | 28.8954978116663 + | 24.556507133269 (10 rows) select sum(cfloat8) from aggfns where cfloat8 < 1000 order by 1; @@ -5426,15 +5700,15 @@ select ss, sum(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | sum ----+------------------- 0 | -2625.22042326164 - 1 | -2581.93168106955 - 2 | -2654.67784076463 - 3 | -2650.42925374582 - 4 | -2612.22224398516 + 3 | -2568.55764517095 + 4 | -2582.83006965648 5 | -2639.69960049726 6 | -2621.00187384058 7 | -2626.2774806004 8 | -2655.30392248183 9 | -2688.47183454782 + 11 | -5266.00169616286 + | -81.8716085748747 (10 rows) select avg(cint2) from aggfns where cfloat8 < 1000 order by 1; @@ -5459,18 +5733,18 @@ select s, avg(cint2) from aggfns where cfloat8 < 1000 group by s order by 1; (10 rows) select ss, avg(cint2) from aggfns where cfloat8 < 1000 group by ss order by 1; - ss | avg -----+---------------------- - 0 | -42.2972824182973825 - 1 | -43.0287773384715480 - 2 | -40.9893899204244032 - 3 | -42.8851408838396477 - 4 | -42.0152144537310445 - 5 | -43.5287022671537961 - 6 | -41.7711325759471498 - 7 | -41.3288123717531655 - 8 | -40.6353035383614434 - 9 | -43.1119563585406136 + ss | avg +----+----------------------- + 0 | -42.2972824182973825 + 3 | -42.6071034966436229 + 4 | -40.0837090471896604 + 5 | -43.5287022671537961 + 6 | -41.7711325759471498 + 7 | -41.3288123717531655 + 8 | -40.6353035383614434 + 9 | -43.1119563585406136 + 11 | -42.9734623946374528 + | -335.0000000000000000 (10 rows) select max(cint2) from aggfns where cfloat8 < 1000 order by 1; @@ -5498,8 +5772,6 @@ select ss, max(cint2) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+------- 0 | 16383 - 1 | 16383 - 2 | 16383 3 | 16383 4 | 16383 5 | 16383 @@ -5507,6 +5779,8 @@ select ss, max(cint2) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 16383 8 | 16383 9 | 16383 + 11 | 16383 + | 13720 (10 rows) select min(cint2) from aggfns where cfloat8 < 1000 order by 1; @@ -5534,8 +5808,6 @@ select ss, min(cint2) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+-------- 0 | -16375 - 1 | -16375 - 2 | -16375 3 | -16375 4 | -16375 5 | -16375 @@ -5543,6 +5815,8 @@ select ss, min(cint2) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | -16375 8 | -16375 9 | -16375 + 11 | -16375 + | -15206 (10 rows) select stddev(cint2) from aggfns where cfloat8 < 1000 order by 1; @@ -5570,15 +5844,15 @@ select ss, stddev(cint2) from aggfns where cfloat8 < 1000 group by ss order by 1 ss | stddev ----+------------------- 0 | 9468.854793575036 - 1 | 9468.590431229826 - 2 | 9469.116705177088 - 3 | 9466.421782354268 - 4 | 9467.442985677590 + 3 | 9466.351508971864 + 4 | 9469.021659499615 5 | 9467.599133444078 6 | 9468.362090451302 7 | 9467.745653535755 8 | 9466.743345080951 9 | 9468.145452253715 + 11 | 9467.946206136906 + | 9796.780548277639 (10 rows) select sum(cint2) from aggfns where cfloat8 < 1000 order by 1; @@ -5603,18 +5877,18 @@ select s, sum(cint2) from aggfns where cfloat8 < 1000 group by s order by 1; (10 rows) select ss, sum(cint2) from aggfns where cfloat8 < 1000 group by ss order by 1; - ss | sum -----+--------- - 0 | -845142 - 1 | -859758 - 2 | -819009 - 3 | -856888 - 4 | -839506 - 5 | -869747 - 6 | -834629 - 7 | -825791 - 8 | -811934 - 9 | -861420 + ss | sum +----+---------- + 0 | -845142 + 3 | -850523 + 4 | -800151 + 5 | -869747 + 6 | -834629 + 7 | -825791 + 8 | -811934 + 9 | -861420 + 11 | -1718122 + | -6365 (10 rows) select avg(cint4) from aggfns where cfloat8 < 1000 order by 1; @@ -5642,15 +5916,15 @@ select ss, avg(cint4) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | avg ----+----------------------- 0 | -2919248.121000000000 - 1 | -2836378.364750000000 - 2 | -2837313.994650000000 - 3 | -2818722.941500000000 - 4 | -2772243.427000000000 + 3 | -2682566.613082428307 + 4 | -2753845.761523447275 5 | -2850351.637450000000 6 | -2845789.891100000000 7 | -2804766.678700000000 8 | -2834269.365200000000 9 | -2814193.446750000000 + 11 | -2846001.238586671331 + | -146005017.57894737 (10 rows) select max(cint4) from aggfns where cfloat8 < 1000 order by 1; @@ -5678,8 +5952,6 @@ select ss, max(cint4) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+------------ 0 | 1073660631 - 1 | 1073660631 - 2 | 1073660631 3 | 1073660631 4 | 1073660631 5 | 1073660631 @@ -5687,6 +5959,8 @@ select ss, max(cint4) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 1073660631 8 | 1073660631 9 | 1073660631 + 11 | 1073660631 + | 978482473 (10 rows) select min(cint4) from aggfns where cfloat8 < 1000 order by 1; @@ -5714,8 +5988,6 @@ select ss, min(cint4) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+------------- 0 | -1073184428 - 1 | -1073184428 - 2 | -1073184428 3 | -1073184428 4 | -1073184428 5 | -1073184428 @@ -5723,6 +5995,8 @@ select ss, min(cint4) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | -1073184428 8 | -1073184428 9 | -1073184428 + 11 | -1073184428 + | -960881516 (10 rows) select stddev(cint4) from aggfns where cfloat8 < 1000 order by 1; @@ -5750,15 +6024,15 @@ select ss, stddev(cint4) from aggfns where cfloat8 < 1000 group by ss order by 1 ss | stddev ----+----------- 0 | 620497458 - 1 | 620477996 - 2 | 620477953 - 3 | 620458232 - 4 | 620500878 + 3 | 620502162 + 4 | 620460588 5 | 620498014 6 | 620492575 7 | 620500389 8 | 620519080 9 | 620517247 + 11 | 620490348 + | 569369090 (10 rows) select sum(cint4) from aggfns where cfloat8 < 1000 order by 1; @@ -5783,18 +6057,18 @@ select s, sum(cint4) from aggfns where cfloat8 < 1000 group by s order by 1; (10 rows) select ss, sum(cint4) from aggfns where cfloat8 < 1000 group by ss order by 1; - ss | sum -----+-------------- - 0 | -58384962420 - 1 | -56727567295 - 2 | -56746279893 - 3 | -56374458830 - 4 | -55444868540 - 5 | -57007032749 - 6 | -56915797822 - 7 | -56095333574 - 8 | -56685387304 - 9 | -56283868935 + ss | sum +----+--------------- + 0 | -58384962420 + 3 | -53600363496 + 4 | -55024592161 + 5 | -57007032749 + 6 | -56915797822 + 7 | -56095333574 + 8 | -56685387304 + 9 | -56283868935 + 11 | -113894123567 + | -2774095334 (10 rows) select avg(cint8) from aggfns where cfloat8 < 1000 order by 1; @@ -5822,15 +6096,15 @@ select ss, avg(cint8) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | avg ----+----------------------- 0 | -2836378.364750000000 - 1 | -2837313.994650000000 - 2 | -2818722.941500000000 - 3 | -2772243.427000000000 - 4 | -2850351.637450000000 + 3 | -2653999.743206045743 + 4 | -3009394.583854661929 5 | -2845789.891100000000 6 | -2804766.678700000000 7 | -2834269.365200000000 8 | -2814193.446750000000 9 | -2819857.913500000000 + 11 | -2748620.862390364577 + | -127121035.31578947 (10 rows) select max(cint8) from aggfns where cfloat8 < 1000 order by 1; @@ -5858,8 +6132,6 @@ select ss, max(cint8) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+------------ 0 | 1073660631 - 1 | 1073660631 - 2 | 1073660631 3 | 1073660631 4 | 1073660631 5 | 1073660631 @@ -5867,6 +6139,8 @@ select ss, max(cint8) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 1073660631 8 | 1073660631 9 | 1073660631 + 11 | 1073660631 + | 989667573 (10 rows) select min(cint8) from aggfns where cfloat8 < 1000 order by 1; @@ -5894,8 +6168,6 @@ select ss, min(cint8) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+------------- 0 | -1073184428 - 1 | -1073184428 - 2 | -1073184428 3 | -1073184428 4 | -1073184428 5 | -1073184428 @@ -5903,6 +6175,8 @@ select ss, min(cint8) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | -1073184428 8 | -1073184428 9 | -1073184428 + 11 | -1073184428 + | -796158533 (10 rows) select stddev(cint8) from aggfns where cfloat8 < 1000 order by 1; @@ -5930,15 +6204,15 @@ select ss, stddev(cint8) from aggfns where cfloat8 < 1000 group by ss order by 1 ss | stddev ----+----------- 0 | 620477996 - 1 | 620477953 - 2 | 620458232 - 3 | 620500878 - 4 | 620498014 + 3 | 620514804 + 4 | 620462299 5 | 620492575 6 | 620500389 7 | 620519080 8 | 620517247 9 | 620524975 + 11 | 620478168 + | 609007799 (10 rows) select sum(cint8) from aggfns where cfloat8 < 1000 order by 1; @@ -5963,18 +6237,18 @@ select s, sum(cint8) from aggfns where cfloat8 < 1000 group by s order by 1; (10 rows) select ss, sum(cint8) from aggfns where cfloat8 < 1000 group by ss order by 1; - ss | sum -----+-------------- - 0 | -56727567295 - 1 | -56746279893 - 2 | -56374458830 - 3 | -55444868540 - 4 | -57007032749 - 5 | -56915797822 - 6 | -56095333574 - 7 | -56685387304 - 8 | -56283868935 - 9 | -56397158270 + ss | sum +----+--------------- + 0 | -56727567295 + 3 | -53029568869 + 4 | -60130713180 + 5 | -56915797822 + 6 | -56095333574 + 7 | -56685387304 + 8 | -56283868935 + 9 | -56397158270 + 11 | -109997058292 + | -2415299671 (10 rows) select max(cts) from aggfns where cfloat8 < 1000 order by 1; @@ -6002,8 +6276,6 @@ select ss, max(cts) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+-------------------------- 0 | Fri Jan 01 06:34:21 2021 - 1 | Fri Jan 01 09:21:01 2021 - 2 | Fri Jan 01 12:07:41 2021 3 | Fri Jan 01 14:54:21 2021 4 | Fri Jan 01 17:41:01 2021 5 | Fri Jan 01 20:27:41 2021 @@ -6011,6 +6283,8 @@ select ss, max(cts) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | Sat Jan 02 02:01:01 2021 8 | Sat Jan 02 04:47:41 2021 9 | Sat Jan 02 07:34:21 2021 + 11 | Fri Jan 01 17:27:33 2021 + | Fri Jan 01 14:45:52 2021 (10 rows) select min(cts) from aggfns where cfloat8 < 1000 order by 1; @@ -6038,8 +6312,6 @@ select ss, min(cts) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+-------------------------- 0 | Fri Jan 01 01:01:02 2021 - 1 | Fri Jan 01 03:47:42 2021 - 2 | Fri Jan 01 06:34:22 2021 3 | Fri Jan 01 09:21:02 2021 4 | Fri Jan 01 12:07:42 2021 5 | Fri Jan 01 14:54:22 2021 @@ -6047,6 +6319,8 @@ select ss, min(cts) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | Fri Jan 01 20:27:42 2021 8 | Fri Jan 01 23:14:22 2021 9 | Sat Jan 02 02:01:02 2021 + 11 | Fri Jan 01 03:47:42 2021 + | Fri Jan 01 09:29:58 2021 (10 rows) select max(ctstz) from aggfns where cfloat8 < 1000 order by 1; @@ -6074,8 +6348,6 @@ select ss, max(ctstz) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+------------------------------ 0 | Fri Jan 01 06:34:21 2021 PST - 1 | Fri Jan 01 09:21:01 2021 PST - 2 | Fri Jan 01 12:07:41 2021 PST 3 | Fri Jan 01 14:54:21 2021 PST 4 | Fri Jan 01 17:41:01 2021 PST 5 | Fri Jan 01 20:27:41 2021 PST @@ -6083,6 +6355,8 @@ select ss, max(ctstz) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | Sat Jan 02 02:01:01 2021 PST 8 | Sat Jan 02 04:47:41 2021 PST 9 | Sat Jan 02 07:34:21 2021 PST + 11 | Fri Jan 01 17:27:33 2021 PST + | Fri Jan 01 14:45:52 2021 PST (10 rows) select min(ctstz) from aggfns where cfloat8 < 1000 order by 1; @@ -6110,8 +6384,6 @@ select ss, min(ctstz) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+------------------------------ 0 | Fri Jan 01 01:01:02 2021 PST - 1 | Fri Jan 01 03:47:42 2021 PST - 2 | Fri Jan 01 06:34:22 2021 PST 3 | Fri Jan 01 09:21:02 2021 PST 4 | Fri Jan 01 12:07:42 2021 PST 5 | Fri Jan 01 14:54:22 2021 PST @@ -6119,6 +6391,8 @@ select ss, min(ctstz) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | Fri Jan 01 20:27:42 2021 PST 8 | Fri Jan 01 23:14:22 2021 PST 9 | Sat Jan 02 02:01:02 2021 PST + 11 | Fri Jan 01 03:47:42 2021 PST + | Fri Jan 01 09:29:58 2021 PST (10 rows) select avg(s) from aggfns where cfloat8 < 1000 order by 1; @@ -6146,8 +6420,6 @@ select ss, avg(s) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | avg ----+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 3 | 3.0000000000000000 4 | 4.0000000000000000 5 | 5.0000000000000000 @@ -6155,6 +6427,8 @@ select ss, avg(s) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 7.0000000000000000 8 | 8.0000000000000000 9 | 9.0000000000000000 + 11 | 1.5011869362053025 + | 3.0000000000000000 (10 rows) select count(s) from aggfns where cfloat8 < 1000 order by 1; @@ -6182,15 +6456,15 @@ select ss, count(s) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | count ----+------- 0 | 20000 - 1 | 20000 - 2 | 20000 - 3 | 20000 - 4 | 20000 + 3 | 19981 + 4 | 19981 5 | 20000 6 | 20000 7 | 20000 8 | 20000 9 | 20000 + 11 | 40019 + | 19 (10 rows) select max(s) from aggfns where cfloat8 < 1000 order by 1; @@ -6218,8 +6492,6 @@ select ss, max(s) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -6227,6 +6499,8 @@ select ss, max(s) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 4 + | 3 (10 rows) select min(s) from aggfns where cfloat8 < 1000 order by 1; @@ -6254,8 +6528,6 @@ select ss, min(s) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -6263,6 +6535,8 @@ select ss, min(s) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 1 + | 3 (10 rows) select stddev(s) from aggfns where cfloat8 < 1000 order by 1; @@ -6287,18 +6561,18 @@ select s, stddev(s) from aggfns where cfloat8 < 1000 group by s order by 1; (10 rows) select ss, stddev(s) from aggfns where cfloat8 < 1000 group by ss order by 1; - ss | stddev -----+-------- - 0 | 0 - 1 | 0 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 - 6 | 0 - 7 | 0 - 8 | 0 - 9 | 0 + ss | stddev +----+------------------------ + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0.50284545977155885187 + | 0 (10 rows) select sum(s) from aggfns where cfloat8 < 1000 order by 1; @@ -6326,31 +6600,31 @@ select ss, sum(s) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | sum ----+-------- 0 | 0 - 1 | 20000 - 2 | 40000 - 3 | 60000 - 4 | 80000 + 3 | 59943 + 4 | 79924 5 | 100000 6 | 120000 7 | 140000 8 | 160000 9 | 180000 + 11 | 60076 + | 57 (10 rows) select avg(ss) from aggfns where cfloat8 < 1000 order by 1; avg -------------------- - 4.5000000000000000 + 6.4009880938689175 (1 row) select s, avg(ss) from aggfns where cfloat8 < 1000 group by s order by 1; s | avg ---+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 + 1 | 11.0000000000000000 + 2 | 11.0000000000000000 3 | 3.0000000000000000 - 4 | 4.0000000000000000 + 4 | 4.0066500000000000 5 | 5.0000000000000000 6 | 6.0000000000000000 7 | 7.0000000000000000 @@ -6362,8 +6636,6 @@ select ss, avg(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | avg ----+---------------------------- 0 | 0.000000000000000000000000 - 1 | 1.00000000000000000000 - 2 | 2.0000000000000000 3 | 3.0000000000000000 4 | 4.0000000000000000 5 | 5.0000000000000000 @@ -6371,22 +6643,60 @@ select ss, avg(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 7.0000000000000000 8 | 8.0000000000000000 9 | 9.0000000000000000 + 11 | 11.0000000000000000 + | +(10 rows) + +select count(ss) from aggfns where cfloat8 < 1000 order by 1; + count +-------- + 199981 +(1 row) + +select s, count(ss) from aggfns where cfloat8 < 1000 group by s order by 1; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 19981 + 4 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select ss, count(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; + ss | count +----+------- + 0 | 20000 + 3 | 19981 + 4 | 19981 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 + 11 | 40019 + | 0 (10 rows) select max(ss) from aggfns where cfloat8 < 1000 order by 1; max ----- - 9 + 11 (1 row) select s, max(ss) from aggfns where cfloat8 < 1000 group by s order by 1; s | max ---+----- 0 | 0 - 1 | 1 - 2 | 2 + 1 | 11 + 2 | 11 3 | 3 - 4 | 4 + 4 | 11 5 | 5 6 | 6 7 | 7 @@ -6398,8 +6708,6 @@ select ss, max(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -6407,6 +6715,8 @@ select ss, max(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 11 + | (10 rows) select min(ss) from aggfns where cfloat8 < 1000 order by 1; @@ -6419,8 +6729,8 @@ select s, min(ss) from aggfns where cfloat8 < 1000 group by s order by 1; s | min ---+----- 0 | 0 - 1 | 1 - 2 | 2 + 1 | 11 + 2 | 11 3 | 3 4 | 4 5 | 5 @@ -6434,8 +6744,6 @@ select ss, min(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+----- 0 | 0 - 1 | 1 - 2 | 2 3 | 3 4 | 4 5 | 5 @@ -6443,35 +6751,35 @@ select ss, min(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 7 8 | 8 9 | 9 + 11 | 11 + | (10 rows) select stddev(ss) from aggfns where cfloat8 < 1000 order by 1; stddev -------------------- - 2.8722885039992502 + 3.3528328280068652 (1 row) select s, stddev(ss) from aggfns where cfloat8 < 1000 group by s order by 1; - s | stddev ----+-------- - 0 | 0 - 1 | 0 - 2 | 0 - 3 | 0 - 4 | 0 - 5 | 0 - 6 | 0 - 7 | 0 - 8 | 0 - 9 | 0 + s | stddev +---+------------------------ + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0.21565737387148452722 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 (10 rows) select ss, stddev(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | stddev ----+-------- 0 | 0 - 1 | 0 - 2 | 0 3 | 0 4 | 0 5 | 0 @@ -6479,22 +6787,24 @@ select ss, stddev(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 0 8 | 0 9 | 0 + 11 | 0 + | (10 rows) select sum(ss) from aggfns where cfloat8 < 1000 order by 1; - sum --------- - 900000 + sum +--------- + 1280076 (1 row) select s, sum(ss) from aggfns where cfloat8 < 1000 group by s order by 1; s | sum ---+-------- 0 | 0 - 1 | 20000 - 2 | 40000 - 3 | 60000 - 4 | 80000 + 1 | 220000 + 2 | 220000 + 3 | 59943 + 4 | 80133 5 | 100000 6 | 120000 7 | 140000 @@ -6506,15 +6816,15 @@ select ss, sum(ss) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | sum ----+-------- 0 | 0 - 1 | 20000 - 2 | 40000 - 3 | 60000 - 4 | 80000 + 3 | 59943 + 4 | 79924 5 | 100000 6 | 120000 7 | 140000 8 | 160000 9 | 180000 + 11 | 440209 + | (10 rows) select max(t) from aggfns where cfloat8 < 1000 order by 1; @@ -6542,8 +6852,6 @@ select ss, max(t) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | max ----+-------- 0 | 20000 - 1 | 30000 - 2 | 40000 3 | 50000 4 | 60000 5 | 70000 @@ -6551,6 +6859,8 @@ select ss, max(t) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 90000 8 | 100000 9 | 110000 + 11 | 59192 + | 49491 (10 rows) select min(t) from aggfns where cfloat8 < 1000 order by 1; @@ -6578,8 +6888,6 @@ select ss, min(t) from aggfns where cfloat8 < 1000 group by ss order by 1; ss | min ----+------- 0 | 1 - 1 | 10001 - 2 | 20001 3 | 30001 4 | 40001 5 | 50001 @@ -6587,8 +6895,26 @@ select ss, min(t) from aggfns where cfloat8 < 1000 group by ss order by 1; 7 | 70001 8 | 80001 9 | 90001 + 11 | 10001 + | 30537 (10 rows) +select count(*) from aggfns where cfloat8 > 1000 order by 1; + count +------- + 0 +(1 row) + +select s, count(*) from aggfns where cfloat8 > 1000 group by s order by 1; + s | count +---+------- +(0 rows) + +select ss, count(*) from aggfns where cfloat8 > 1000 group by ss order by 1; + ss | count +----+------- +(0 rows) + select max(cdate) from aggfns where cfloat8 > 1000 order by 1; max ----- @@ -7207,6 +7533,22 @@ select ss, avg(ss) from aggfns where cfloat8 > 1000 group by ss order by 1; ----+----- (0 rows) +select count(ss) from aggfns where cfloat8 > 1000 order by 1; + count +------- + 0 +(1 row) + +select s, count(ss) from aggfns where cfloat8 > 1000 group by s order by 1; + s | count +---+------- +(0 rows) + +select ss, count(ss) from aggfns where cfloat8 > 1000 group by ss order by 1; + ss | count +----+------- +(0 rows) + select max(ss) from aggfns where cfloat8 > 1000 order by 1; max ----- @@ -7302,6 +7644,41 @@ select ss, min(t) from aggfns where cfloat8 > 1000 group by ss order by 1; ----+----- (0 rows) +select count(*) from aggfns where cint2 is null order by 1; + count +------- + 190 +(1 row) + +select s, count(*) from aggfns where cint2 is null group by s order by 1; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 19 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select ss, count(*) from aggfns where cint2 is null group by ss order by 1; + ss | count +----+------- + 0 | 19 + 3 | 19 + 4 | 19 + 5 | 19 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 + 11 | 38 +(9 rows) + select avg(cint2) from aggfns where cint2 is null order by 1; avg ----- @@ -7327,8 +7704,6 @@ select ss, avg(cint2) from aggfns where cint2 is null group by ss order by 1; ss | avg ----+----- 0 | - 1 | - 2 | 3 | 4 | 5 | @@ -7336,7 +7711,8 @@ select ss, avg(cint2) from aggfns where cint2 is null group by ss order by 1; 7 | 8 | 9 | -(10 rows) + 11 | +(9 rows) select count(cint2) from aggfns where cint2 is null order by 1; count @@ -7363,8 +7739,6 @@ select ss, count(cint2) from aggfns where cint2 is null group by ss order by 1; ss | count ----+------- 0 | 0 - 1 | 0 - 2 | 0 3 | 0 4 | 0 5 | 0 @@ -7372,7 +7746,8 @@ select ss, count(cint2) from aggfns where cint2 is null group by ss order by 1; 7 | 0 8 | 0 9 | 0 -(10 rows) + 11 | 0 +(9 rows) select max(cint2) from aggfns where cint2 is null order by 1; max @@ -7399,8 +7774,6 @@ select ss, max(cint2) from aggfns where cint2 is null group by ss order by 1; ss | max ----+----- 0 | - 1 | - 2 | 3 | 4 | 5 | @@ -7408,7 +7781,8 @@ select ss, max(cint2) from aggfns where cint2 is null group by ss order by 1; 7 | 8 | 9 | -(10 rows) + 11 | +(9 rows) select min(cint2) from aggfns where cint2 is null order by 1; min @@ -7435,8 +7809,6 @@ select ss, min(cint2) from aggfns where cint2 is null group by ss order by 1; ss | min ----+----- 0 | - 1 | - 2 | 3 | 4 | 5 | @@ -7444,7 +7816,8 @@ select ss, min(cint2) from aggfns where cint2 is null group by ss order by 1; 7 | 8 | 9 | -(10 rows) + 11 | +(9 rows) select stddev(cint2) from aggfns where cint2 is null order by 1; stddev @@ -7471,8 +7844,6 @@ select ss, stddev(cint2) from aggfns where cint2 is null group by ss order by 1; ss | stddev ----+-------- 0 | - 1 | - 2 | 3 | 4 | 5 | @@ -7480,7 +7851,8 @@ select ss, stddev(cint2) from aggfns where cint2 is null group by ss order by 1; 7 | 8 | 9 | -(10 rows) + 11 | +(9 rows) select sum(cint2) from aggfns where cint2 is null order by 1; sum @@ -7507,8 +7879,6 @@ select ss, sum(cint2) from aggfns where cint2 is null group by ss order by 1; ss | sum ----+----- 0 | - 1 | - 2 | 3 | 4 | 5 | @@ -7516,5 +7886,6 @@ select ss, sum(cint2) from aggfns where cint2 is null group by ss order by 1; 7 | 8 | 9 | -(10 rows) + 11 | +(9 rows) diff --git a/tsl/test/sql/vector_agg_functions.sql b/tsl/test/sql/vector_agg_functions.sql index 35ec690bf07..b3bad3f812f 100644 --- a/tsl/test/sql/vector_agg_functions.sql +++ b/tsl/test/sql/vector_agg_functions.sql @@ -12,38 +12,57 @@ $$ LANGUAGE SQL; \set CHUNK_ROWS 100000::int \set GROUPING_CARDINALITY 10::int -create table aggfns(t int, s int, ss int, +create table aggfns(t int, s int, cint2 int2, cint4 int4, cint8 int8, cfloat4 float4, cfloat8 float8, cts timestamp, ctstz timestamptz, cdate date); select create_hypertable('aggfns', 's', chunk_time_interval => :GROUPING_CARDINALITY / :CHUNKS); -insert into aggfns -select s * 10000::int + t, - s, +create view source as +select s * 10000::int + t as t, s, - case when t % 1051 = 0 then null else (mix(s + t + 1) * 32767)::int2 end, - (mix(s + t + 2) * 32767 * 65536)::int4, - (mix(s + t + 3) * 32767 * 65536)::int8, + case when t % 1051 = 0 then null else (mix(s + t + 1) * 32767)::int2 end as cint2, + (mix(s + t + 2) * 32767 * 65536)::int4 as cint4, + (mix(s + t + 3) * 32767 * 65536)::int8 as cint8, case when s = 1 and t = 1061 then 'nan'::float4 when s = 2 and t = 1061 then '+inf'::float4 when s = 3 and t = 1061 then '-inf'::float4 - else (mix(s + t + 4) * 100)::float4 end, - (mix(s + t + 5) * 100)::float8, - '2021-01-01 01:01:01'::timestamp + interval '1 second' * (s * 10000::int + t), - '2021-01-01 01:01:01'::timestamptz + interval '1 second' * (s * 10000::int + t), - '2021-01-01 01:01:01'::timestamptz + interval '1 day' * (s * 10000::int + t) + else (mix(s + t + 4) * 100)::float4 end as cfloat4, + (mix(s + t + 5) * 100)::float8 as cfloat8, + '2021-01-01 01:01:01'::timestamp + interval '1 second' * (s * 10000::int + t) as cts, + '2021-01-01 01:01:01'::timestamptz + interval '1 second' * (s * 10000::int + t) as ctstz, + '2021-01-01'::date + interval '1 day' * (s * 10000::int + t) as cdate from generate_series(1::int, :CHUNK_ROWS * :CHUNKS / :GROUPING_CARDINALITY) t, generate_series(0::int, :GROUPING_CARDINALITY - 1::int) s(s) ; +insert into aggfns select * from source where s = 1; + alter table aggfns set (timescaledb.compress, timescaledb.compress_orderby = 't', timescaledb.compress_segmentby = 's'); select count(compress_chunk(x)) from show_chunks('aggfns') x; +alter table aggfns add column ss int default 11; + +insert into aggfns +select *, + case + -- null in entire batch + when s = 2 then null + -- null for some rows + when s = 3 and t % 1053 = 0 then null + -- for some rows same as default + when s = 4 and t % 1057 = 0 then 11 + -- not null for entire batch + else s + end as ss +from source where s != 1; + +select count(compress_chunk(x)) from show_chunks('aggfns') x; + analyze aggfns; ---- Uncomment to generate reference. Note that there are minor discrepancies @@ -72,7 +91,8 @@ from 'cfloat8', 'cts', 'ctstz', - 'cdate']) variable, + 'cdate', + '*']) variable, unnest(array[ 'min', 'max', @@ -99,8 +119,9 @@ where end and case + when variable = '*' then function = 'count' when condition = 'cint2 is null' then variable = 'cint2' - when function = 'count' then variable in ('cfloat4', 's') + when function = 'count' then variable in ('cfloat4', 's', 'ss') when variable = 't' then function in ('min', 'max') when variable in ('cts', 'ctstz', 'cdate') then function in ('min', 'max') else true end From 3e06b9224643f3b14f88e284bb019c6c96959016 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:26:57 +0100 Subject: [PATCH 26/58] remove modified simplehash --- tsl/src/import/ts_simplehash.h | 1038 -------------------------------- 1 file changed, 1038 deletions(-) delete mode 100644 tsl/src/import/ts_simplehash.h diff --git a/tsl/src/import/ts_simplehash.h b/tsl/src/import/ts_simplehash.h deleted file mode 100644 index 18c3f73bb6a..00000000000 --- a/tsl/src/import/ts_simplehash.h +++ /dev/null @@ -1,1038 +0,0 @@ -/* - * This file and its contents are licensed under the Apache License 2.0. - * Please see the included NOTICE for copyright information and - * LICENSE-APACHE for a copy of the license. - */ - -/* - * simplehash.h - * - * When included this file generates a "templated" (by way of macros) - * open-addressing hash table implementation specialized to user-defined - * types. - * - * It's probably not worthwhile to generate such a specialized implementation - * for hash tables that aren't performance or space sensitive. - * - * Compared to dynahash, simplehash has the following benefits: - * - * - Due to the "templated" code generation has known structure sizes and no - * indirect function calls (which show up substantially in dynahash - * profiles). These features considerably increase speed for small - * entries. - * - Open addressing has better CPU cache behavior than dynahash's chained - * hashtables. - * - The generated interface is type-safe and easier to use than dynahash, - * though at the cost of more complex setup. - * - Allocates memory in a MemoryContext or another allocator with a - * malloc/free style interface (which isn't easily usable in a shared - * memory context) - * - Does not require the overhead of a separate memory context. - * - * Usage notes: - * - * To generate a hash-table and associated functions for a use case several - * macros have to be #define'ed before this file is included. Including - * the file #undef's all those, so a new hash table can be generated - * afterwards. - * The relevant parameters are: - * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo' - * will result in hash table type 'foo_hash' and functions like - * 'foo_insert'/'foo_lookup' and so forth. - * - SH_ELEMENT_TYPE - type of the contained elements - * - SH_KEY_TYPE - type of the hashtable's key - * - SH_DECLARE - if defined function prototypes and type declarations are - * generated - * - SH_DEFINE - if defined function definitions are generated - * - SH_SCOPE - in which scope (e.g. extern, static inline) do function - * declarations reside - * - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead, - * use this to allocate bytes. The allocator must zero the returned space. - * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions - * are defined, so you can supply your own - * The following parameters are only relevant when SH_DEFINE is defined: - * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key - * - SH_EQUAL(table, a, b) - compare two table keys - * - SH_HASH_KEY(table, key) - generate hash for the key - * - SH_STORE_HASH - if defined the hash is stored in the elements - * - SH_GET_HASH(tb, a) - return the field to store the hash in - * - * While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because - * the hash table implementation needs to compare hashes to move elements - * (particularly when growing the hash), it's preferable, if possible, to - * store the element's hash in the element's data type. If the hash is so - * stored, the hash table will also compare hashes before calling SH_EQUAL - * when comparing two keys. - * - * For convenience the hash table create functions accept a void pointer - * that will be stored in the hash table type's member private_data. This - * allows callbacks to reference caller provided data. - * - * For examples of usage look at tidbitmap.c (file local definition) and - * execnodes.h/execGrouping.c (exposed declaration, file local - * implementation). - * - * Hash table design: - * - * The hash table design chosen is a variant of linear open-addressing. The - * reason for doing so is that linear addressing is CPU cache & pipeline - * friendly. The biggest disadvantage of simple linear addressing schemes - * are highly variable lookup times due to clustering, and deletions - * leaving a lot of tombstones around. To address these issues a variant - * of "robin hood" hashing is employed. Robin hood hashing optimizes - * chaining lengths by moving elements close to their optimal bucket - * ("rich" elements), out of the way if a to-be-inserted element is further - * away from its optimal position (i.e. it's "poor"). While that can make - * insertions slower, the average lookup performance is a lot better, and - * higher fill factors can be used in a still performant manner. To avoid - * tombstones - which normally solve the issue that a deleted node's - * presence is relevant to determine whether a lookup needs to continue - * looking or is done - buckets following a deleted element are shifted - * backwards, unless they're empty or already at their optimal position. - * - * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/include/lib/simplehash.h - */ - -#include "port/pg_bitutils.h" - -/* helpers */ -#define SH_MAKE_PREFIX(a) CppConcat(a, _) -#define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX), name) -#define SH_MAKE_NAME_(a, b) CppConcat(a, b) - -/* name macros for: */ - -/* type declarations */ -#define SH_TYPE SH_MAKE_NAME(hash) -#define SH_ITERATOR SH_MAKE_NAME(iterator) - -/* function declarations */ -#define SH_CREATE SH_MAKE_NAME(create) -#define SH_DESTROY SH_MAKE_NAME(destroy) -#define SH_RESET SH_MAKE_NAME(reset) -#define SH_INSERT SH_MAKE_NAME(insert) -#define SH_INSERT_HASH SH_MAKE_NAME(insert_hash) -#define SH_LOOKUP SH_MAKE_NAME(lookup) -#define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash) -#define SH_GROW SH_MAKE_NAME(grow) -#define SH_START_ITERATE SH_MAKE_NAME(start_iterate) -#define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at) -#define SH_ITERATE SH_MAKE_NAME(iterate) -#define SH_ALLOCATE SH_MAKE_NAME(allocate) -#define SH_FREE SH_MAKE_NAME(free) -#define SH_STAT SH_MAKE_NAME(stat) - -/* internal helper functions (no externally visible prototypes) */ -#define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters) -#define SH_NEXT SH_MAKE_NAME(next) -#define SH_PREV SH_MAKE_NAME(prev) -#define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance) -#define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket) -#define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash) -#define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal) -#define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal) - -/* generate forward declarations necessary to use the hash table */ -#ifdef SH_DECLARE - -/* type definitions */ -typedef struct SH_TYPE -{ - /* - * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash - * tables. Note that the maximum number of elements is lower - * (SH_MAX_FILLFACTOR) - */ - uint64 size; - - /* how many elements have valid contents */ - uint32 members; - - /* mask for bucket and size calculations, based on size */ - uint32 sizemask; - - /* boundary after which to grow hashtable */ - uint32 grow_threshold; - - /* hash buckets */ - SH_ELEMENT_TYPE *restrict data; - -#ifndef SH_RAW_ALLOCATOR - /* memory context to use for allocations */ - MemoryContext ctx; -#endif - - /* user defined data, useful for callbacks */ - void *private_data; -} SH_TYPE; - -typedef struct SH_ITERATOR -{ - uint32 cur; /* current element */ - uint32 end; - bool done; /* iterator exhausted? */ -} SH_ITERATOR; - -/* externally visible function prototypes */ -#ifdef SH_RAW_ALLOCATOR -/* _hash _create(uint32 nelements, void *private_data) */ -SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data); -#else -/* - * _hash _create(MemoryContext ctx, uint32 nelements, - * void *private_data) - */ -SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data); -#endif - -/* void _destroy(_hash *tb) */ -SH_SCOPE void SH_DESTROY(SH_TYPE *tb); - -/* void _reset(_hash *tb) */ -SH_SCOPE void SH_RESET(SH_TYPE *tb); - -/* void _grow(_hash *tb, uint64 newsize) */ -SH_SCOPE void SH_GROW(SH_TYPE *tb, uint64 newsize); - -/* *_insert(_hash *tb, key, bool *found) */ -SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found); - -/* - * *_insert_hash(_hash *tb, key, uint32 hash, - * bool *found) - */ -SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash, bool *found); - -/* *_lookup(_hash *tb, key) */ -SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE *tb, SH_KEY_TYPE key); - -/* *_lookup_hash(_hash *tb, key, uint32 hash) */ -SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash); - -/* void _start_iterate(_hash *tb, _iterator *iter) */ -SH_SCOPE void SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter); - -/* - * void _start_iterate_at(_hash *tb, _iterator *iter, - * uint32 at) - */ -SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE *tb, SH_ITERATOR *iter, uint32 at); - -/* *_iterate(_hash *tb, _iterator *iter) */ -SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter); - -/* void _stat(_hash *tb */ -SH_SCOPE void SH_STAT(SH_TYPE *tb); - -#endif /* SH_DECLARE */ - -/* generate implementation of the hash table */ -#ifdef SH_DEFINE - -#ifndef SH_RAW_ALLOCATOR -#include "utils/memutils.h" -#endif - -/* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */ -#define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1) - -/* normal fillfactor, unless already close to maximum */ -#ifndef SH_FILLFACTOR -#define SH_FILLFACTOR (0.9) -#endif -/* increase fillfactor if we otherwise would error out */ -#define SH_MAX_FILLFACTOR (0.98) -/* grow if actual and optimal location bigger than */ -#ifndef SH_GROW_MAX_DIB -#define SH_GROW_MAX_DIB 25 -#endif -/* grow if more than elements to move when inserting */ -#ifndef SH_GROW_MAX_MOVE -#define SH_GROW_MAX_MOVE 150 -#endif -#ifndef SH_GROW_MIN_FILLFACTOR -/* but do not grow due to SH_GROW_MAX_* if below */ -#define SH_GROW_MIN_FILLFACTOR 0.1 -#endif - -#ifdef SH_STORE_HASH -#define SH_COMPARE_KEYS(tb, ahash, akey, b) \ - (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey)) -#else -#define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey)) -#endif - -/* - * Wrap the following definitions in include guards, to avoid multiple - * definition errors if this header is included more than once. The rest of - * the file deliberately has no include guards, because it can be included - * with different parameters to define functions and types with non-colliding - * names. - */ -#ifndef SIMPLEHASH_H -#define SIMPLEHASH_H - -#ifdef FRONTEND -#define sh_error(...) pg_fatal(__VA_ARGS__) -#define sh_log(...) pg_log_info(__VA_ARGS__) -#else -#define sh_error(...) elog(ERROR, __VA_ARGS__) -#define sh_log(...) elog(LOG, __VA_ARGS__) -#endif - -#endif - -/* - * Compute sizing parameters for hashtable. Called when creating and growing - * the hashtable. - */ -static inline void -SH_COMPUTE_PARAMETERS(SH_TYPE *tb, uint64 newsize) -{ - uint64 size; - - /* supporting zero sized hashes would complicate matters */ - size = Max(newsize, 2); - - /* round up size to the next power of 2, that's how bucketing works */ - size = pg_nextpower2_64(size); - Assert(size <= SH_MAX_SIZE); - - /* - * Verify that allocation of ->data is possible on this platform, without - * overflowing Size. - */ - if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2)) - sh_error("hash table too large"); - - /* now set size */ - tb->size = size; - tb->sizemask = (uint32) (size - 1); - - /* - * Compute the next threshold at which we need to grow the hash table - * again. - */ - if (tb->size == SH_MAX_SIZE) - tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR; - else - tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR; -} - -/* return the optimal bucket for the hash */ -static pg_attribute_always_inline uint32 -SH_INITIAL_BUCKET(SH_TYPE *tb, uint32 hash) -{ - return hash & tb->sizemask; -} - -/* return next bucket after the current, handling wraparound */ -static inline uint32 -SH_NEXT(SH_TYPE *tb, uint32 curelem, uint32 startelem) -{ - curelem = (curelem + 1) & tb->sizemask; - - Assert(curelem != startelem); - - return curelem; -} - -/* return bucket before the current, handling wraparound */ -static inline uint32 -SH_PREV(SH_TYPE *tb, uint32 curelem, uint32 startelem) -{ - curelem = (curelem - 1) & tb->sizemask; - - Assert(curelem != startelem); - - return curelem; -} - -/* return distance between bucket and its optimal position */ -static inline uint32 -SH_DISTANCE_FROM_OPTIMAL(SH_TYPE *tb, uint32 optimal, uint32 bucket) -{ - if (optimal <= bucket) - return bucket - optimal; - else - return (tb->size + bucket) - optimal; -} - -static inline uint32 -SH_ENTRY_HASH(SH_TYPE *tb, SH_ELEMENT_TYPE *entry) -{ -#ifdef SH_STORE_HASH - return SH_GET_HASH(tb, entry); -#else - return SH_HASH_KEY(tb, entry->SH_KEY); -#endif -} - -/* default memory allocator function */ -static inline void *SH_ALLOCATE(SH_TYPE *type, Size size); -static inline void SH_FREE(SH_TYPE *type, void *pointer); - -#ifndef SH_USE_NONDEFAULT_ALLOCATOR - -/* default memory allocator function */ -static inline void * -SH_ALLOCATE(SH_TYPE *type, Size size) -{ -#ifdef SH_RAW_ALLOCATOR - return SH_RAW_ALLOCATOR(size); -#else - return MemoryContextAllocExtended(type->ctx, size, MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO); -#endif -} - -/* default memory free function */ -static inline void -SH_FREE(SH_TYPE *type, void *pointer) -{ - pfree(pointer); -} - -#endif - -/* - * Create a hash table with enough space for `nelements` distinct members. - * Memory for the hash table is allocated from the passed-in context. If - * desired, the array of elements can be allocated using a passed-in allocator; - * this could be useful in order to place the array of elements in a shared - * memory, or in a context that will outlive the rest of the hash table. - * Memory other than for the array of elements will still be allocated from - * the passed-in context. - */ -#ifdef SH_RAW_ALLOCATOR -SH_SCOPE SH_TYPE * -SH_CREATE(uint32 nelements, void *private_data) -#else -SH_SCOPE SH_TYPE * -SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data) -#endif -{ - SH_TYPE *tb; - uint64 size; - -#ifdef SH_RAW_ALLOCATOR - tb = (SH_TYPE *) SH_RAW_ALLOCATOR(sizeof(SH_TYPE)); -#else - tb = (SH_TYPE *) MemoryContextAllocZero(ctx, sizeof(SH_TYPE)); - tb->ctx = ctx; -#endif - tb->private_data = private_data; - - /* increase nelements by fillfactor, want to store nelements elements */ - size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR); - - SH_COMPUTE_PARAMETERS(tb, size); - - tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size); - - return tb; -} - -/* destroy a previously created hash table */ -SH_SCOPE void -SH_DESTROY(SH_TYPE *tb) -{ - SH_FREE(tb, tb->data); - pfree(tb); -} - -/* reset the contents of a previously created hash table */ -SH_SCOPE void -SH_RESET(SH_TYPE *tb) -{ - memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size); - tb->members = 0; -} - -/* - * Grow a hash table to at least `newsize` buckets. - * - * Usually this will automatically be called by insertions/deletions, when - * necessary. But resizing to the exact input size can be advantageous - * performance-wise, when known at some point. - */ -SH_SCOPE void -SH_GROW(SH_TYPE *tb, uint64 newsize) -{ - uint64 oldsize = tb->size; - SH_ELEMENT_TYPE *olddata = tb->data; - SH_ELEMENT_TYPE *newdata; - uint32 i; - uint32 startelem = 0; - uint32 copyelem; - - Assert(oldsize == pg_nextpower2_64(oldsize)); - Assert(oldsize != SH_MAX_SIZE); - Assert(oldsize < newsize); - - /* compute parameters for new table */ - SH_COMPUTE_PARAMETERS(tb, newsize); - - tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size); - - newdata = tb->data; - - /* - * Copy entries from the old data to newdata. We theoretically could use - * SH_INSERT here, to avoid code duplication, but that's more general than - * we need. We neither want tb->members increased, nor do we need to do - * deal with deleted elements, nor do we need to compare keys. So a - * special-cased implementation is lot faster. As resizing can be time - * consuming and frequent, that's worthwhile to optimize. - * - * To be able to simply move entries over, we have to start not at the - * first bucket (i.e olddata[0]), but find the first bucket that's either - * empty, or is occupied by an entry at its optimal position. Such a - * bucket has to exist in any table with a load factor under 1, as not all - * buckets are occupied, i.e. there always has to be an empty bucket. By - * starting at such a bucket we can move the entries to the larger table, - * without having to deal with conflicts. - */ - - /* search for the first element in the hash that's not wrapped around */ - for (i = 0; i < oldsize; i++) - { - SH_ELEMENT_TYPE *oldentry = &olddata[i]; - uint32 hash; - uint32 optimal; - - if (SH_ENTRY_EMPTY(oldentry)) - { - startelem = i; - break; - } - - hash = SH_ENTRY_HASH(tb, oldentry); - optimal = SH_INITIAL_BUCKET(tb, hash); - - if (optimal == i) - { - startelem = i; - break; - } - } - - /* and copy all elements in the old table */ - copyelem = startelem; - for (i = 0; i < oldsize; i++) - { - SH_ELEMENT_TYPE *oldentry = &olddata[copyelem]; - - if (!SH_ENTRY_EMPTY(oldentry)) - { - uint32 hash; - uint32 startelem; - uint32 curelem; - SH_ELEMENT_TYPE *newentry; - - hash = SH_ENTRY_HASH(tb, oldentry); - startelem = SH_INITIAL_BUCKET(tb, hash); - curelem = startelem; - - /* find empty element to put data into */ - while (true) - { - newentry = &newdata[curelem]; - - if (SH_ENTRY_EMPTY(newentry)) - { - break; - } - - curelem = SH_NEXT(tb, curelem, startelem); - } - - /* copy entry to new slot */ - memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE)); - } - - /* can't use SH_NEXT here, would use new size */ - copyelem++; - if (copyelem >= oldsize) - { - copyelem = 0; - } - } - - SH_FREE(tb, olddata); -} - -/* - * This is a separate static inline function, so it can be reliably be inlined - * into its wrapper functions even if SH_SCOPE is extern. - */ -static pg_attribute_always_inline SH_ELEMENT_TYPE * -SH_INSERT_HASH_INTERNAL(SH_TYPE *restrict tb, SH_KEY_TYPE key, uint32 hash, bool *found) -{ - /* - * We do the grow check even if the key is actually present, to avoid - * doing the check inside the loop. This also lets us avoid having to - * re-find our position in the hashtable after resizing. - * - * Note that this also reached when resizing the table due to - * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE. - */ - if (unlikely(tb->members >= tb->grow_threshold)) - { - if (unlikely(tb->size == SH_MAX_SIZE)) - sh_error("hash table size exceeded"); - - /* - * When optimizing, it can be very useful to print these out. - */ - /* SH_STAT(tb); */ - SH_GROW(tb, tb->size * 2); - /* SH_STAT(tb); */ - } - - SH_ELEMENT_TYPE *restrict data = tb->data; - - /* perform insert, start bucket search at optimal location */ - const uint32 startelem = SH_INITIAL_BUCKET(tb, hash); - uint32 curelem = startelem; - uint32 insertdist = 0; - while (true) - { - SH_ELEMENT_TYPE *entry = &data[curelem]; - - /* any empty bucket can directly be used */ - if (SH_ENTRY_EMPTY(entry)) - { - tb->members++; - entry->SH_KEY = key; -#ifdef SH_STORE_HASH - SH_GET_HASH(tb, entry) = hash; -#endif - *found = false; - return entry; - } - - /* - * If the bucket is not empty, we either found a match (in which case - * we're done), or we have to decide whether to skip over or move the - * colliding entry. When the colliding element's distance to its - * optimal position is smaller than the to-be-inserted entry's, we - * shift the colliding entry (and its followers) forward by one. - */ - - if (SH_COMPARE_KEYS(tb, hash, key, entry)) - { - Assert(!SH_ENTRY_EMPTY(entry)); - *found = true; - return entry; - } - - const uint32 curhash = SH_ENTRY_HASH(tb, entry); - const uint32 curoptimal = SH_INITIAL_BUCKET(tb, curhash); - const uint32 curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem); - - if (insertdist > curdist) - { - /* We're going to insert at this position. */ - break; - } - - curelem = SH_NEXT(tb, curelem, startelem); - insertdist++; - - /* - * To avoid negative consequences from overly imbalanced hashtables, - * grow the hashtable if collisions lead to large runs. The most - * likely cause of such imbalance is filling a (currently) small - * table, from a currently big one, in hash-table order. Don't grow - * if the hashtable would be too empty, to prevent quick space - * explosion for some weird edge cases. - */ - if (unlikely(insertdist > SH_GROW_MAX_DIB) && - ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) - { - SH_GROW(tb, tb->size * 2); - return SH_INSERT_HASH_INTERNAL(tb, key, hash, found); - } - } - - /* Actually insert. */ - SH_ELEMENT_TYPE *entry = &data[curelem]; - SH_ELEMENT_TYPE *lastentry = entry; - uint32 emptyelem = curelem; - int32 emptydist = 0; - - /* find next empty bucket */ - while (true) - { - SH_ELEMENT_TYPE *emptyentry; - - emptyelem = SH_NEXT(tb, emptyelem, startelem); - emptyentry = &data[emptyelem]; - - if (SH_ENTRY_EMPTY(emptyentry)) - { - lastentry = emptyentry; - break; - } - - /* - * To avoid negative consequences from overly imbalanced - * hashtables, grow the hashtable if collisions would require - * us to move a lot of entries. The most likely cause of such - * imbalance is filling a (currently) small table, from a - * currently big one, in hash-table order. Don't grow if the - * hashtable would be too empty, to prevent quick space - * explosion for some weird edge cases. - */ - if (unlikely(++emptydist > SH_GROW_MAX_MOVE) && - ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) - { - SH_GROW(tb, tb->size * 2); - return SH_INSERT_HASH_INTERNAL(tb, key, hash, found); - } - } - - /* shift forward, starting at last occupied element */ - - /* - * TODO: This could be optimized to be one memcpy in many cases, - * excepting wrapping around at the end of ->data. Hasn't shown up - * in profiles so far though. - */ - uint32 moveelem = emptyelem; - while (moveelem != curelem) - { - SH_ELEMENT_TYPE *moveentry; - - moveelem = SH_PREV(tb, moveelem, startelem); - moveentry = &data[moveelem]; - - memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE)); - lastentry = moveentry; - } - - /* and fill the now empty spot */ - tb->members++; - - entry->SH_KEY = key; -#ifdef SH_STORE_HASH - SH_GET_HASH(tb, entry) = hash; -#endif - *found = false; - return entry; -} - -/* - * Insert the key key into the hash-table, set *found to true if the key - * already exists, false otherwise. Returns the hash-table entry in either - * case. - */ -static pg_attribute_always_inline SH_ELEMENT_TYPE * -SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found) -{ - uint32 hash = SH_HASH_KEY(tb, key); - - return SH_INSERT_HASH_INTERNAL(tb, key, hash, found); -} - -/* - * Insert the key key into the hash-table using an already-calculated - * hash. Set *found to true if the key already exists, false - * otherwise. Returns the hash-table entry in either case. - */ -SH_SCOPE SH_ELEMENT_TYPE * -SH_INSERT_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash, bool *found) -{ - return SH_INSERT_HASH_INTERNAL(tb, key, hash, found); -} - -/* - * This is a separate static inline function, so it can be reliably be inlined - * into its wrapper functions even if SH_SCOPE is extern. - */ -static inline SH_ELEMENT_TYPE * -SH_LOOKUP_HASH_INTERNAL(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash) -{ - const uint32 startelem = SH_INITIAL_BUCKET(tb, hash); - uint32 curelem = startelem; - - while (true) - { - SH_ELEMENT_TYPE *entry = &tb->data[curelem]; - - if (SH_ENTRY_EMPTY(entry)) - { - return NULL; - } - - Assert(!SH_ENTRY_EMPTY(entry)); - - if (SH_COMPARE_KEYS(tb, hash, key, entry)) - return entry; - - /* - * TODO: we could stop search based on distance. If the current - * buckets's distance-from-optimal is smaller than what we've skipped - * already, the entry doesn't exist. Probably only do so if - * SH_STORE_HASH is defined, to avoid re-computing hashes? - */ - - curelem = SH_NEXT(tb, curelem, startelem); - } -} - -/* - * Lookup up entry in hash table. Returns NULL if key not present. - */ -SH_SCOPE SH_ELEMENT_TYPE * -SH_LOOKUP(SH_TYPE *tb, SH_KEY_TYPE key) -{ - uint32 hash = SH_HASH_KEY(tb, key); - - return SH_LOOKUP_HASH_INTERNAL(tb, key, hash); -} - -/* - * Lookup up entry in hash table using an already-calculated hash. - * - * Returns NULL if key not present. - */ -SH_SCOPE SH_ELEMENT_TYPE * -SH_LOOKUP_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash) -{ - return SH_LOOKUP_HASH_INTERNAL(tb, key, hash); -} - -/* - * Initialize iterator. - */ -SH_SCOPE void -SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter) -{ - uint64 startelem = PG_UINT64_MAX; - - /* - * Search for the first empty element. As deletions during iterations are - * supported, we want to start/end at an element that cannot be affected - * by elements being shifted. - */ - for (uint32 i = 0; i < tb->size; i++) - { - SH_ELEMENT_TYPE *entry = &tb->data[i]; - - if (SH_ENTRY_EMPTY(entry)) - { - startelem = i; - break; - } - } - - /* we should have found an empty element */ - Assert(startelem < SH_MAX_SIZE); - - /* - * Iterate backwards, that allows the current element to be deleted, even - * if there are backward shifts - */ - iter->cur = startelem; - iter->end = iter->cur; - iter->done = false; -} - -/* - * Initialize iterator to a specific bucket. That's really only useful for - * cases where callers are partially iterating over the hashspace, and that - * iteration deletes and inserts elements based on visited entries. Doing that - * repeatedly could lead to an unbalanced keyspace when always starting at the - * same position. - */ -SH_SCOPE void -SH_START_ITERATE_AT(SH_TYPE *tb, SH_ITERATOR *iter, uint32 at) -{ - /* - * Iterate backwards, that allows the current element to be deleted, even - * if there are backward shifts. - */ - iter->cur = at & tb->sizemask; /* ensure at is within a valid range */ - iter->end = iter->cur; - iter->done = false; -} - -/* - * Iterate over all entries in the hash-table. Return the next occupied entry, - * or NULL if done. - * - * During iteration the current entry in the hash table may be deleted, - * without leading to elements being skipped or returned twice. Additionally - * the rest of the table may be modified (i.e. there can be insertions or - * deletions), but if so, there's neither a guarantee that all nodes are - * visited at least once, nor a guarantee that a node is visited at most once. - */ -SH_SCOPE SH_ELEMENT_TYPE * -SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter) -{ - while (!iter->done) - { - SH_ELEMENT_TYPE *elem; - - elem = &tb->data[iter->cur]; - - /* next element in backward direction */ - iter->cur = (iter->cur - 1) & tb->sizemask; - - if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask)) - iter->done = true; - if (!SH_ENTRY_EMPTY(elem)) - { - return elem; - } - } - - return NULL; -} - -/* - * Report some statistics about the state of the hashtable. For - * debugging/profiling purposes only. - */ -SH_SCOPE void -SH_STAT(SH_TYPE *tb) -{ - uint32 max_chain_length = 0; - uint32 total_chain_length = 0; - double avg_chain_length; - double fillfactor; - uint32 i; - - uint32 *collisions = (uint32 *) palloc0(tb->size * sizeof(uint32)); - uint32 total_collisions = 0; - uint32 max_collisions = 0; - double avg_collisions; - - for (i = 0; i < tb->size; i++) - { - uint32 hash; - uint32 optimal; - uint32 dist; - SH_ELEMENT_TYPE *elem; - - elem = &tb->data[i]; - - if (SH_ENTRY_EMPTY(elem)) - continue; - - hash = SH_ENTRY_HASH(tb, elem); - optimal = SH_INITIAL_BUCKET(tb, hash); - dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i); - - if (dist > max_chain_length) - max_chain_length = dist; - total_chain_length += dist; - - collisions[optimal]++; - } - - for (i = 0; i < tb->size; i++) - { - uint32 curcoll = collisions[i]; - - if (curcoll == 0) - continue; - - /* single contained element is not a collision */ - curcoll--; - total_collisions += curcoll; - if (curcoll > max_collisions) - max_collisions = curcoll; - } - - /* large enough to be worth freeing, even if just used for debugging */ - pfree(collisions); - - if (tb->members > 0) - { - fillfactor = tb->members / ((double) tb->size); - avg_chain_length = ((double) total_chain_length) / tb->members; - avg_collisions = ((double) total_collisions) / tb->members; - } - else - { - fillfactor = 0; - avg_chain_length = 0; - avg_collisions = 0; - } - - sh_log("size: " UINT64_FORMAT - ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, " - "total_collisions: %u, max_collisions: %u, avg_collisions: %f", - tb->size, - tb->members, - fillfactor, - total_chain_length, - max_chain_length, - avg_chain_length, - total_collisions, - max_collisions, - avg_collisions); -} - -#endif /* SH_DEFINE */ - -/* undefine external parameters, so next hash table can be defined */ -#undef SH_PREFIX -#undef SH_KEY_TYPE -#undef SH_KEY -#undef SH_ELEMENT_TYPE -#undef SH_HASH_KEY -#undef SH_SCOPE -#undef SH_DECLARE -#undef SH_DEFINE -#undef SH_GET_HASH -#undef SH_STORE_HASH -#undef SH_USE_NONDEFAULT_ALLOCATOR -#undef SH_EQUAL - -/* undefine locally declared macros */ -#undef SH_MAKE_PREFIX -#undef SH_MAKE_NAME -#undef SH_MAKE_NAME_ -#undef SH_FILLFACTOR -#undef SH_MAX_FILLFACTOR -#undef SH_GROW_MAX_DIB -#undef SH_GROW_MAX_MOVE -#undef SH_GROW_MIN_FILLFACTOR -#undef SH_MAX_SIZE - -/* types */ -#undef SH_TYPE -#undef SH_ITERATOR - -/* external function names */ -#undef SH_CREATE -#undef SH_DESTROY -#undef SH_RESET -#undef SH_INSERT -#undef SH_INSERT_HASH -#undef SH_LOOKUP -#undef SH_LOOKUP_HASH -#undef SH_GROW -#undef SH_START_ITERATE -#undef SH_START_ITERATE_AT -#undef SH_ITERATE -#undef SH_ALLOCATE -#undef SH_FREE -#undef SH_STAT - -/* internal function names */ -#undef SH_COMPUTE_PARAMETERS -#undef SH_COMPARE_KEYS -#undef SH_INITIAL_BUCKET -#undef SH_NEXT -#undef SH_PREV -#undef SH_DISTANCE_FROM_OPTIMAL -#undef SH_ENTRY_HASH -#undef SH_INSERT_HASH_INTERNAL -#undef SH_LOOKUP_HASH_INTERNAL From a7942ed5b2636920b1bb2decfcd80d41ba2b2b65 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:59:02 +0100 Subject: [PATCH 27/58] offsets --- .../nodes/vector_agg/grouping_policy_hash.c | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index d2aa6870335..693ec753f79 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -69,7 +69,6 @@ typedef struct #define SH_SCOPE static inline #define SH_DECLARE #define SH_DEFINE -#define SH_ENTRY_EMPTY(entry) (entry->agg_state_index == 0) #include struct h_hash; @@ -92,9 +91,20 @@ typedef struct */ MemoryContext agg_extra_mctx; - uint64 aggstate_bytes_per_key; - uint64 allocated_aggstate_rows; + /* + * Temporary storage of aggregate state offsets for a given batch. We keep + * it in the policy because it is too big to keep on stack, and we don't + * want to reallocate it each batch. + */ + uint32 *offsets; + uint64 num_allocated_offsets; + + /* + * Storage of aggregate function states, each List entry is the array of + * states for the respective function from agg_defs. + */ List *per_agg_states; + uint64 allocated_aggstate_rows; uint64 stat_input_total_rows; uint64 stat_input_valid_rows; @@ -112,13 +122,11 @@ create_grouping_policy_hash(List *agg_defs, List *output_grouping_columns) policy->agg_defs = agg_defs; policy->agg_extra_mctx = AllocSetContextCreate(CurrentMemoryContext, "agg extra", ALLOCSET_DEFAULT_SIZES); - policy->allocated_aggstate_rows = 1000; + policy->allocated_aggstate_rows = TARGET_COMPRESSED_BATCH_SIZE; ListCell *lc; foreach (lc, agg_defs) { VectorAggDef *agg_def = lfirst(lc); - policy->aggstate_bytes_per_key += agg_def->func.state_bytes; - policy->per_agg_states = lappend(policy->per_agg_states, palloc0(agg_def->func.state_bytes * policy->allocated_aggstate_rows)); @@ -455,6 +463,17 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) const uint64_t *restrict filter = batch_state->vector_qual_result; const int n = batch_state->total_batch_rows; + /* + * Initialize the array for storing the aggregate state offsets corresponding + * to a given batch row. + */ + if ((size_t) n > policy->num_allocated_offsets) + { + policy->num_allocated_offsets = n; + policy->offsets = palloc(sizeof(policy->offsets[0]) * policy->num_allocated_offsets); + } + memset(policy->offsets, 0, n * sizeof(policy->offsets[0])); + /* * For the partial aggregation node, the grouping columns are always in the * output, so we don't have to separately look at the list of the grouping @@ -508,8 +527,7 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) /* * Match rows to aggregation states using a hash table. */ - uint32 offsets[1000] = { 0 }; - Assert((size_t) end_row <= sizeof(offsets) / sizeof(*offsets)); + Assert((size_t) end_row <= policy->num_allocated_offsets); switch ((int) key_column->decompression_type) { case DT_Scalar: @@ -519,7 +537,7 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) next_unused_state_index, start_row, end_row, - offsets); + policy->offsets); break; case 8: next_unused_state_index = fill_offsets_arrow_fixed_8(policy, @@ -528,7 +546,7 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) next_unused_state_index, start_row, end_row, - offsets); + policy->offsets); break; case 4: next_unused_state_index = fill_offsets_arrow_fixed_4(policy, @@ -537,7 +555,7 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) next_unused_state_index, start_row, end_row, - offsets); + policy->offsets); break; case 2: next_unused_state_index = fill_offsets_arrow_fixed_2(policy, @@ -546,7 +564,7 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) next_unused_state_index, start_row, end_row, - offsets); + policy->offsets); break; default: Assert(false); @@ -592,7 +610,7 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) end_row, lfirst(aggdeflc), lfirst(aggstatelc), - offsets, + policy->offsets, policy->agg_extra_mctx); } } From 6fb517f01a9d1bd6591d422f6415d6fbc3359433 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:02:45 +0100 Subject: [PATCH 28/58] cleanup --- .../workflows/linux-32bit-build-and-test.yaml | 4 +- tsl/src/nodes/vector_agg/exec.c | 7 +- .../nodes/vector_agg/grouping_policy_hash.c | 70 +++++++++++++------ tsl/src/nodes/vector_agg/plan.c | 15 +++- 4 files changed, 72 insertions(+), 24 deletions(-) diff --git a/.github/workflows/linux-32bit-build-and-test.yaml b/.github/workflows/linux-32bit-build-and-test.yaml index 7a261a1445d..ed3afc0be1e 100644 --- a/.github/workflows/linux-32bit-build-and-test.yaml +++ b/.github/workflows/linux-32bit-build-and-test.yaml @@ -47,7 +47,9 @@ jobs: CC: clang-14 CXX: clang++-14 DEBIAN_FRONTEND: noninteractive - IGNORES: "append-* transparent_decompression-* transparent_decompress_chunk-* pg_dump telemetry bgw_db_scheduler*" + # vectorized_aggregation has different output on i386 because int8 is by + # reference and currently it cannot be used for vectorized hash grouping. + IGNORES: "append-* transparent_decompression-* transparent_decompress_chunk-* pg_dump telemetry bgw_db_scheduler* vectorized_aggregation" SKIPS: chunk_adaptive histogram_test-* EXTENSIONS: "postgres_fdw test_decoding" strategy: diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index fa4dc703d42..a6ebae5de51 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -122,7 +122,6 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) } } - /// List *grouping_child_output_offsets = linitial(cscan->custom_private); if (list_length(vector_agg_state->output_grouping_columns) == 1) { GroupingColumn *col = @@ -132,6 +131,9 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) if (desc->type == COMPRESSED_COLUMN && desc->by_value && desc->value_bytes > 0 && (size_t) desc->value_bytes <= sizeof(Datum)) { + /* + * Hash grouping by a single fixed-size by-value compressed column. + */ vector_agg_state->grouping = create_grouping_policy_hash(vector_agg_state->agg_defs, vector_agg_state->output_grouping_columns); @@ -140,6 +142,9 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) if (vector_agg_state->grouping == NULL) { + /* + * Per-batch grouping. + */ vector_agg_state->grouping = create_grouping_policy_batch(vector_agg_state->agg_defs, vector_agg_state->output_grouping_columns); diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 693ec753f79..1381755b932 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -5,9 +5,8 @@ */ /* - * This grouping policy aggregates entire compressed batches. It can be used to - * aggregate with no grouping, or to produce partial aggregates per each batch - * to group by segmentby columns. + * This grouping policy groups the rows using a hash table. Currently it only + * supports a single fixed-size by-value compressed column that fits into a Datum. */ #include @@ -20,6 +19,8 @@ #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/vector_agg/exec.h" +#define DEBUG_LOG(MSG, ...) elog(DEBUG3, MSG, __VA_ARGS__) + /* * We can use crc32 as a hash function, it has bad properties but takes only one * cycle, which is why it is sometimes used in the existing hash table @@ -75,14 +76,33 @@ struct h_hash; typedef struct { + /* + * We're using data inheritance from the GroupingPolicy. + */ GroupingPolicy funcs; + List *agg_defs; List *output_grouping_columns; - bool partial_per_batch; + + /* + * The hash table we use for grouping. + */ struct h_hash *table; - bool have_null_key; - struct h_iterator iter; + + /* + * We have to track whether we are in the mode of returning the partial + * aggregation results, and also use a hash table iterator to track our + * progress between emit() calls. + */ bool returning_results; + struct h_iterator iter; + + /* + * In single-column grouping, we store the null key outside of the hash + * table, and it has a reserved aggregate state index 1. We also reset this + * flag after we output the null key during iteration. + */ + bool have_null_key; /* * A memory context for aggregate functions to allocate additional data, @@ -93,19 +113,23 @@ typedef struct /* * Temporary storage of aggregate state offsets for a given batch. We keep - * it in the policy because it is too big to keep on stack, and we don't - * want to reallocate it each batch. + * it in the policy because it is potentially too big to keep on stack, and + * we don't want to reallocate it each batch. */ uint32 *offsets; uint64 num_allocated_offsets; /* * Storage of aggregate function states, each List entry is the array of - * states for the respective function from agg_defs. + * states for the respective function from agg_defs. The state index 0 is + * invalid, and the state index 1 is reserved for a null key. */ List *per_agg_states; uint64 allocated_aggstate_rows; + /* + * Some statistics for debugging. + */ uint64 stat_input_total_rows; uint64 stat_input_valid_rows; uint64 stat_bulk_filtered_rows; @@ -200,7 +224,7 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e { /* * Scalar argument, or count(*). The latter has an optimized - * implementation for this case. + * implementation. */ if (agg_def->func.agg_many_scalar != NULL) { @@ -266,6 +290,7 @@ fill_offsets_impl(GroupingPolicyHash *policy, CompressedColumnValues column, offsets[row] = 1; } } + return next_unused_state_index; } @@ -488,7 +513,7 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) { /* * If we have a highly selective filter, it's easy to skip the rows for - * which the entire filter bitmap words are zero. + * which the entire words of the filter bitmap are zero. */ if (filter) { @@ -641,15 +666,20 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) { policy->returning_results = true; h_start_iterate(policy->table, &policy->iter); - // fprintf(stderr, - // "spill after %ld input %ld valid %ld bulk filtered, %d keys, %f ratio, %ld - // aggctx bytes, %ld aggstate bytes\n", policy->stat_input_total_rows, - // policy->stat_input_valid_rows, policy->stat_bulk_filtered_rows, - // policy->table->members - // + policy->have_null_key, policy->stat_input_valid_rows / - //(float) (policy->table->members + policy->have_null_key), - // MemoryContextMemAllocated(policy->table->ctx, - // false), MemoryContextMemAllocated(policy->agg_extra_mctx, false)); + + const float keys = policy->table->members + policy->have_null_key; + if (keys > 0) + { + DEBUG_LOG("spill after %ld input %ld valid %ld bulk filtered %.0f keys %f ratio %ld " + "aggctx bytes %ld aggstate bytes", + policy->stat_input_total_rows, + policy->stat_input_valid_rows, + policy->stat_bulk_filtered_rows, + keys, + policy->stat_input_valid_rows / keys, + MemoryContextMemAllocated(policy->table->ctx, false), + MemoryContextMemAllocated(policy->agg_extra_mctx, false)); + } } HashEntry null_key_entry = { .agg_state_index = 1 }; diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index f358d7a5a28..8c42cb43906 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -313,17 +313,25 @@ can_vectorize_aggref(Aggref *aggref, CustomScan *custom) /* * Whether we can perform vectorized aggregation with a given grouping. - * Currently supports either no grouping or grouping by segmentby columns. */ static bool can_vectorize_grouping(Agg *agg, CustomScan *custom) { + /* + * We support vectorized aggregation without grouping. + */ if (agg->numCols == 0) { return true; } - if (agg->numCols == 1) + /* + * We support hashed vectorized grouping by one fixed-size by-value + * compressed column. + * We cannot use it when the plan has GroupAggregate because the + * latter requires sorted output. + */ + if (agg->numCols == 1 && agg->aggstrategy == AGG_HASHED) { int offset = AttrNumberGetAttrOffset(agg->grpColIdx[0]); TargetEntry *entry = list_nth(agg->plan.targetlist, offset); @@ -342,6 +350,9 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom) } } + /* + * We support grouping by any number of columns if all of them are segmentby. + */ for (int i = 0; i < agg->numCols; i++) { int offset = AttrNumberGetAttrOffset(agg->grpColIdx[i]); From ffb28cf7143530a0089c762696daa7c7dbb3459b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:05:04 +0100 Subject: [PATCH 29/58] changelog --- .unreleased/vectorized-grouping-one-fixed | 1 + 1 file changed, 1 insertion(+) create mode 100644 .unreleased/vectorized-grouping-one-fixed diff --git a/.unreleased/vectorized-grouping-one-fixed b/.unreleased/vectorized-grouping-one-fixed new file mode 100644 index 00000000000..47f74a45210 --- /dev/null +++ b/.unreleased/vectorized-grouping-one-fixed @@ -0,0 +1 @@ +Implements: #7341 Vectorized aggregation with grouping by one fixed-size by-value compressed column (such as arithmetic types). From 778ca97d906341304ef0934b73c8f2f8537167f5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:18:10 +0100 Subject: [PATCH 30/58] cleanup --- .../nodes/vector_agg/function/agg_const_helper.c | 16 +++++++++++----- .../nodes/vector_agg/function/agg_many_helper.c | 6 ++++++ .../vector_agg/function/float48_accum_single.c | 2 +- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tsl/src/nodes/vector_agg/function/agg_const_helper.c b/tsl/src/nodes/vector_agg/function/agg_const_helper.c index c83d38526be..4823a789096 100644 --- a/tsl/src/nodes/vector_agg/function/agg_const_helper.c +++ b/tsl/src/nodes/vector_agg/function/agg_const_helper.c @@ -11,14 +11,20 @@ * implementation otherwise. */ static void -FUNCTION_NAME(const)(void *agg_state, Datum constvalue, bool constisnull, int nn, +FUNCTION_NAME(const)(void *agg_state, Datum constvalue, bool constisnull, int n, MemoryContext agg_extra_mctx) { - const uint64 valid = constisnull ? 0 : 1; - const CTYPE value = valid ? DATUM_TO_CTYPE(constvalue) : 0; + if (constisnull) + { + return; + } + + const CTYPE value = DATUM_TO_CTYPE(constvalue); - for (int i = 0; i < nn; i++) + MemoryContext old = MemoryContextSwitchTo(agg_extra_mctx); + for (int i = 0; i < n; i++) { - FUNCTION_NAME(vector_impl)(agg_state, 1, &value, &valid, NULL, agg_extra_mctx); + FUNCTION_NAME(one)(agg_state, value); } + MemoryContextSwitchTo(old); } diff --git a/tsl/src/nodes/vector_agg/function/agg_many_helper.c b/tsl/src/nodes/vector_agg/function/agg_many_helper.c index f8517c4328a..dcf75622e4a 100644 --- a/tsl/src/nodes/vector_agg/function/agg_many_helper.c +++ b/tsl/src/nodes/vector_agg/function/agg_many_helper.c @@ -4,6 +4,12 @@ * LICENSE-TIMESCALE for a copy of the license. */ +/* + * A generic implementation of adding the given batch to many aggregate function + * states with given offsets. Used for hash aggregation, and builds on the + * FUNCTION_NAME(one) function, which adds one passing non-null row to the given + * aggregate function state. + */ static void FUNCTION_NAME(many)(void *restrict agg_states, uint32 *restrict offsets, int start_row, int end_row, const ArrowArray *vector, MemoryContext agg_extra_mctx) diff --git a/tsl/src/nodes/vector_agg/function/float48_accum_single.c b/tsl/src/nodes/vector_agg/function/float48_accum_single.c index 6a07c28e238..bb411d9d5c2 100644 --- a/tsl/src/nodes/vector_agg/function/float48_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/float48_accum_single.c @@ -203,7 +203,7 @@ FUNCTION_NAME(vector_impl)(void *agg_state, size_t n, const CTYPE *values, const /* * Vector registers can be up to 512 bits wide. */ -#define UNROLL_SIZE ((int) ((512 / 8) / sizeof(CTYPE))) +#define UNROLL_SIZE ((int) (512 / 8 / sizeof(CTYPE))) /* * Each inner iteration works with its own accumulators to avoid data From ef3847af40c15998eb595223c751ec69288a4789 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:18:23 +0100 Subject: [PATCH 31/58] benchmark vectorized hash grouping (simple) (2024-10-15 no. 12) From 1409c741d8c28fd2aaa94778b798ff50d4ffbc83 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:27:47 +0100 Subject: [PATCH 32/58] 32-bit --- tsl/src/nodes/vector_agg/grouping_policy_hash.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 1381755b932..8b387e2e900 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -19,7 +19,15 @@ #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/vector_agg/exec.h" +#ifdef USE_FLOAT8_BYVAL #define DEBUG_LOG(MSG, ...) elog(DEBUG3, MSG, __VA_ARGS__) +#else +/* + * On 32-bit platforms we'd have to use the cross-platform int width printf + * specifiers which are really unreadable. + */ +#define DEBUG_LOG(...) +#endif /* * We can use crc32 as a hash function, it has bad properties but takes only one From 514ae96b12709a0d3a702257cb5f6c2bb4b5097f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 19:30:22 +0100 Subject: [PATCH 33/58] some renames --- ...many_helper.c => agg_many_vector_helper.c} | 4 ++-- ...agg_const_helper.c => agg_scalar_helper.c} | 4 ++-- .../function/float48_accum_single.c | 8 +++---- tsl/src/nodes/vector_agg/function/functions.c | 6 ++--- tsl/src/nodes/vector_agg/function/functions.h | 22 +++++++++++++------ .../vector_agg/function/int128_accum_single.c | 8 +++---- .../function/int24_avg_accum_single.c | 8 +++---- .../vector_agg/function/int24_sum_single.c | 8 +++---- .../function/minmax_arithmetic_single.c | 8 +++---- .../vector_agg/function/sum_float_single.c | 8 +++---- .../nodes/vector_agg/grouping_policy_batch.c | 2 +- .../nodes/vector_agg/grouping_policy_hash.c | 5 +++-- 12 files changed, 50 insertions(+), 41 deletions(-) rename tsl/src/nodes/vector_agg/function/{agg_many_helper.c => agg_many_vector_helper.c} (85%) rename tsl/src/nodes/vector_agg/function/{agg_const_helper.c => agg_scalar_helper.c} (85%) diff --git a/tsl/src/nodes/vector_agg/function/agg_many_helper.c b/tsl/src/nodes/vector_agg/function/agg_many_vector_helper.c similarity index 85% rename from tsl/src/nodes/vector_agg/function/agg_many_helper.c rename to tsl/src/nodes/vector_agg/function/agg_many_vector_helper.c index dcf75622e4a..47916e29131 100644 --- a/tsl/src/nodes/vector_agg/function/agg_many_helper.c +++ b/tsl/src/nodes/vector_agg/function/agg_many_vector_helper.c @@ -11,8 +11,8 @@ * aggregate function state. */ static void -FUNCTION_NAME(many)(void *restrict agg_states, uint32 *restrict offsets, int start_row, int end_row, - const ArrowArray *vector, MemoryContext agg_extra_mctx) +FUNCTION_NAME(many_vector)(void *restrict agg_states, uint32 *restrict offsets, int start_row, + int end_row, const ArrowArray *vector, MemoryContext agg_extra_mctx) { MemoryContext old = MemoryContextSwitchTo(agg_extra_mctx); const CTYPE *values = vector->buffers[1]; diff --git a/tsl/src/nodes/vector_agg/function/agg_const_helper.c b/tsl/src/nodes/vector_agg/function/agg_scalar_helper.c similarity index 85% rename from tsl/src/nodes/vector_agg/function/agg_const_helper.c rename to tsl/src/nodes/vector_agg/function/agg_scalar_helper.c index 4823a789096..0d81abfcf65 100644 --- a/tsl/src/nodes/vector_agg/function/agg_const_helper.c +++ b/tsl/src/nodes/vector_agg/function/agg_scalar_helper.c @@ -11,8 +11,8 @@ * implementation otherwise. */ static void -FUNCTION_NAME(const)(void *agg_state, Datum constvalue, bool constisnull, int n, - MemoryContext agg_extra_mctx) +FUNCTION_NAME(scalar)(void *agg_state, Datum constvalue, bool constisnull, int n, + MemoryContext agg_extra_mctx) { if (constisnull) { diff --git a/tsl/src/nodes/vector_agg/function/float48_accum_single.c b/tsl/src/nodes/vector_agg/function/float48_accum_single.c index bb411d9d5c2..73743c5aa1a 100644 --- a/tsl/src/nodes/vector_agg/function/float48_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/float48_accum_single.c @@ -319,17 +319,17 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) state->Sx = newSx; } -#include "agg_const_helper.c" -#include "agg_many_helper.c" +#include "agg_many_vector_helper.c" +#include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { .state_bytes = sizeof(FUNCTION_NAME(state)), .agg_init = FUNCTION_NAME(init), .agg_emit = FUNCTION_NAME(emit), - .agg_const = FUNCTION_NAME(const), + .agg_scalar = FUNCTION_NAME(scalar), .agg_vector = FUNCTION_NAME(vector), - .agg_many = FUNCTION_NAME(many), + .agg_many_vector = FUNCTION_NAME(many_vector), }; #undef UPDATE #undef COMBINE diff --git a/tsl/src/nodes/vector_agg/function/functions.c b/tsl/src/nodes/vector_agg/function/functions.c index 4dea3c8c90e..826dd02bfa9 100644 --- a/tsl/src/nodes/vector_agg/function/functions.c +++ b/tsl/src/nodes/vector_agg/function/functions.c @@ -72,7 +72,7 @@ count_star_many_scalar(void *restrict agg_states, uint32 *restrict offsets, int VectorAggFunctions count_star_agg = { .state_bytes = sizeof(CountState), .agg_init = count_init, - .agg_const = count_star_const, + .agg_scalar = count_star_const, .agg_emit = count_emit, .agg_many_scalar = count_star_many_scalar, }; @@ -152,9 +152,9 @@ VectorAggFunctions count_any_agg = { .state_bytes = sizeof(CountState), .agg_init = count_init, .agg_emit = count_emit, - .agg_const = count_any_const, + .agg_scalar = count_any_const, .agg_vector = count_any_vector, - .agg_many = count_any_many, + .agg_many_vector = count_any_many, }; /* diff --git a/tsl/src/nodes/vector_agg/function/functions.h b/tsl/src/nodes/vector_agg/function/functions.h index 9d5539fb244..70785a5d802 100644 --- a/tsl/src/nodes/vector_agg/function/functions.h +++ b/tsl/src/nodes/vector_agg/function/functions.h @@ -23,18 +23,26 @@ typedef struct void (*agg_vector)(void *restrict agg_state, const ArrowArray *vector, const uint64 *filter, MemoryContext agg_extra_mctx); - /* Aggregate a constant (like segmentby or column with default value). */ - void (*agg_const)(void *restrict agg_state, Datum constvalue, bool constisnull, int n, - MemoryContext agg_extra_mctx); - - void (*agg_many)(void *restrict agg_states, uint32 *restrict offsets, int start_row, - int end_row, const ArrowArray *vector, MemoryContext agg_extra_mctx); + /* Aggregate a scalar value, like segmentby or column with default value. */ + void (*agg_scalar)(void *restrict agg_state, Datum constvalue, bool constisnull, int n, + MemoryContext agg_extra_mctx); + /* + * Add the rows of the given arrow array to aggregate function states given + * by the respecitve offsets. + */ + void (*agg_many_vector)(void *restrict agg_states, uint32 *restrict offsets, int start_row, + int end_row, const ArrowArray *vector, MemoryContext agg_extra_mctx); + + /* + * Same as above, but for a scalar argument. This is mostly important for + * count(*) and can be NULL. + */ void (*agg_many_scalar)(void *restrict agg_states, uint32 *restrict offsets, int start_row, int end_row, Datum constvalue, bool constisnull, MemoryContext agg_extra_mctx); - /* Emit a partial result. */ + /* Emit a partial aggregation result. */ void (*agg_emit)(void *restrict agg_state, Datum *out_result, bool *out_isnull); } VectorAggFunctions; diff --git a/tsl/src/nodes/vector_agg/function/int128_accum_single.c b/tsl/src/nodes/vector_agg/function/int128_accum_single.c index bf0f90e5044..bf60e64cd12 100644 --- a/tsl/src/nodes/vector_agg/function/int128_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/int128_accum_single.c @@ -110,17 +110,17 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) #endif } -#include "agg_const_helper.c" -#include "agg_many_helper.c" +#include "agg_many_vector_helper.c" +#include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { .state_bytes = sizeof(FUNCTION_NAME(state)), .agg_init = FUNCTION_NAME(init), .agg_emit = FUNCTION_NAME(emit), - .agg_const = FUNCTION_NAME(const), + .agg_scalar = FUNCTION_NAME(scalar), .agg_vector = FUNCTION_NAME(vector), - .agg_many = FUNCTION_NAME(many), + .agg_many_vector = FUNCTION_NAME(many_vector), }; #endif diff --git a/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c b/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c index 2051fe006d5..62ebb5ad52b 100644 --- a/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c @@ -38,17 +38,17 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) state->sum += value; } -#include "agg_const_helper.c" -#include "agg_many_helper.c" +#include "agg_many_vector_helper.c" +#include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { .state_bytes = sizeof(Int24AvgAccumState), .agg_init = int24_avg_accum_init, .agg_emit = int24_avg_accum_emit, - .agg_const = FUNCTION_NAME(const), + .agg_scalar = FUNCTION_NAME(scalar), .agg_vector = FUNCTION_NAME(vector), - .agg_many = FUNCTION_NAME(many), + .agg_many_vector = FUNCTION_NAME(many_vector), }; #endif diff --git a/tsl/src/nodes/vector_agg/function/int24_sum_single.c b/tsl/src/nodes/vector_agg/function/int24_sum_single.c index c96a45fc96f..8bfb15676ae 100644 --- a/tsl/src/nodes/vector_agg/function/int24_sum_single.c +++ b/tsl/src/nodes/vector_agg/function/int24_sum_single.c @@ -61,17 +61,17 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) typedef Int24SumState FUNCTION_NAME(state); -#include "agg_const_helper.c" -#include "agg_many_helper.c" +#include "agg_many_vector_helper.c" +#include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { .state_bytes = sizeof(Int24SumState), .agg_init = int_sum_init, .agg_emit = int_sum_emit, - .agg_const = FUNCTION_NAME(const), + .agg_scalar = FUNCTION_NAME(scalar), .agg_vector = FUNCTION_NAME(vector), - .agg_many = FUNCTION_NAME(many), + .agg_many_vector = FUNCTION_NAME(many_vector), }; #endif diff --git a/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c b/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c index 127b5176291..6d110fcdc34 100644 --- a/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c +++ b/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c @@ -58,17 +58,17 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) } } -#include "agg_const_helper.c" -#include "agg_many_helper.c" +#include "agg_many_vector_helper.c" +#include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { .state_bytes = sizeof(MinMaxState), .agg_init = minmax_init, .agg_emit = minmax_emit, - .agg_const = FUNCTION_NAME(const), + .agg_scalar = FUNCTION_NAME(scalar), .agg_vector = FUNCTION_NAME(vector), - .agg_many = FUNCTION_NAME(many), + .agg_many_vector = FUNCTION_NAME(many_vector), }; #endif diff --git a/tsl/src/nodes/vector_agg/function/sum_float_single.c b/tsl/src/nodes/vector_agg/function/sum_float_single.c index 41565feabfc..6b56b433c89 100644 --- a/tsl/src/nodes/vector_agg/function/sum_float_single.c +++ b/tsl/src/nodes/vector_agg/function/sum_float_single.c @@ -90,17 +90,17 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) state->result += value; } -#include "agg_const_helper.c" -#include "agg_many_helper.c" +#include "agg_many_vector_helper.c" +#include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" VectorAggFunctions FUNCTION_NAME(argdef) = { .state_bytes = sizeof(FloatSumState), .agg_init = float_sum_init, .agg_emit = FUNCTION_NAME(emit), - .agg_const = FUNCTION_NAME(const), + .agg_scalar = FUNCTION_NAME(scalar), .agg_vector = FUNCTION_NAME(vector), - .agg_many = FUNCTION_NAME(many), + .agg_many_vector = FUNCTION_NAME(many_vector), }; #endif diff --git a/tsl/src/nodes/vector_agg/grouping_policy_batch.c b/tsl/src/nodes/vector_agg/grouping_policy_batch.c index 23d695cedc1..f7c7db9ee46 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_batch.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_batch.c @@ -146,7 +146,7 @@ compute_single_aggregate(DecompressBatchState *batch_state, VectorAggDef *agg_de */ Assert(n > 0); - agg_def->func.agg_const(agg_state, arg_datum, arg_isnull, n, agg_extra_mctx); + agg_def->func.agg_scalar(agg_state, arg_datum, arg_isnull, n, agg_extra_mctx); } } diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 8b387e2e900..fdcf50e41cd 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -226,7 +226,8 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e if (arg_arrow != NULL) { /* Arrow argument. */ - agg_def->func.agg_many(agg_states, offsets, start_row, end_row, arg_arrow, agg_extra_mctx); + agg_def->func + .agg_many_vector(agg_states, offsets, start_row, end_row, arg_arrow, agg_extra_mctx); } else { @@ -254,7 +255,7 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e } void *state = (offsets[i] * agg_def->func.state_bytes + (char *) agg_states); - agg_def->func.agg_const(state, arg_datum, arg_isnull, 1, agg_extra_mctx); + agg_def->func.agg_scalar(state, arg_datum, arg_isnull, 1, agg_extra_mctx); } } } From 22d23b344c185b2ffb3950b80419efa0f443cbec Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 19:47:25 +0100 Subject: [PATCH 34/58] cleanup --- tsl/src/nodes/vector_agg/function/functions.c | 18 +++++++++--------- .../nodes/vector_agg/grouping_policy_hash.c | 5 +++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tsl/src/nodes/vector_agg/function/functions.c b/tsl/src/nodes/vector_agg/function/functions.c index 826dd02bfa9..fc141762497 100644 --- a/tsl/src/nodes/vector_agg/function/functions.c +++ b/tsl/src/nodes/vector_agg/function/functions.c @@ -45,8 +45,8 @@ count_emit(void *agg_state, Datum *out_result, bool *out_isnull) } static void -count_star_const(void *agg_state, Datum constvalue, bool constisnull, int n, - MemoryContext agg_extra_mctx) +count_star_scalar(void *agg_state, Datum constvalue, bool constisnull, int n, + MemoryContext agg_extra_mctx) { CountState *state = (CountState *) agg_state; state->count += n; @@ -72,7 +72,7 @@ count_star_many_scalar(void *restrict agg_states, uint32 *restrict offsets, int VectorAggFunctions count_star_agg = { .state_bytes = sizeof(CountState), .agg_init = count_init, - .agg_scalar = count_star_const, + .agg_scalar = count_star_scalar, .agg_emit = count_emit, .agg_many_scalar = count_star_many_scalar, }; @@ -81,8 +81,8 @@ VectorAggFunctions count_star_agg = { * Aggregate function count(x). */ static void -count_any_const(void *agg_state, Datum constvalue, bool constisnull, int n, - MemoryContext agg_extra_mctx) +count_any_scalar(void *agg_state, Datum constvalue, bool constisnull, int n, + MemoryContext agg_extra_mctx) { if (constisnull) { @@ -94,8 +94,8 @@ count_any_const(void *agg_state, Datum constvalue, bool constisnull, int n, } static void -count_any_vector(void *agg_state, const ArrowArray *vector, const uint64 *filter, - MemoryContext agg_extra_mctx) +count_any_many_vector(void *agg_state, const ArrowArray *vector, const uint64 *filter, + MemoryContext agg_extra_mctx) { CountState *state = (CountState *) agg_state; const int n = vector->length; @@ -152,8 +152,8 @@ VectorAggFunctions count_any_agg = { .state_bytes = sizeof(CountState), .agg_init = count_init, .agg_emit = count_emit, - .agg_scalar = count_any_const, - .agg_vector = count_any_vector, + .agg_scalar = count_any_scalar, + .agg_vector = count_any_many_vector, .agg_many_vector = count_any_many, }; diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index fdcf50e41cd..0a8258efcb2 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -80,8 +80,9 @@ typedef struct #define SH_DEFINE #include -struct h_hash; - +/* + * Hash grouping policy. + */ typedef struct { /* From cd7a1dce4bb2ac6798954493eb90b9feb1be9fc8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 15 Oct 2024 20:06:42 +0100 Subject: [PATCH 35/58] spelling --- tsl/src/nodes/vector_agg/function/functions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/nodes/vector_agg/function/functions.h b/tsl/src/nodes/vector_agg/function/functions.h index 70785a5d802..18c43c163f1 100644 --- a/tsl/src/nodes/vector_agg/function/functions.h +++ b/tsl/src/nodes/vector_agg/function/functions.h @@ -29,7 +29,7 @@ typedef struct /* * Add the rows of the given arrow array to aggregate function states given - * by the respecitve offsets. + * by the respective offsets. */ void (*agg_many_vector)(void *restrict agg_states, uint32 *restrict offsets, int start_row, int end_row, const ArrowArray *vector, MemoryContext agg_extra_mctx); From 9e51c1987e7819be529ce1cf30e4f3fe16de5306 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Nov 2024 12:00:06 +0100 Subject: [PATCH 36/58] Vectorize aggregate FILTER clause --- tsl/src/compression/arrow_c_data_interface.h | 47 +- .../nodes/decompress_chunk/compressed_batch.c | 16 +- .../nodes/decompress_chunk/compressed_batch.h | 16 + tsl/src/nodes/decompress_chunk/planner.c | 6 - tsl/src/nodes/vector_agg/exec.c | 37 + tsl/src/nodes/vector_agg/exec.h | 2 + .../nodes/vector_agg/grouping_policy_batch.c | 18 +- tsl/src/nodes/vector_agg/plan.c | 305 +- tsl/test/expected/vector_agg_filter.out | 2944 +++++++++++++++++ tsl/test/expected/vector_agg_param.out | 85 +- tsl/test/expected/vectorized_aggregation.out | 59 +- tsl/test/sql/CMakeLists.txt | 1 + tsl/test/sql/vector_agg_filter.sql | 106 + tsl/test/sql/vector_agg_param.sql | 14 +- tsl/test/sql/vectorized_aggregation.sql | 4 + 15 files changed, 3512 insertions(+), 148 deletions(-) create mode 100644 tsl/test/expected/vector_agg_filter.out create mode 100644 tsl/test/sql/vector_agg_filter.sql diff --git a/tsl/src/compression/arrow_c_data_interface.h b/tsl/src/compression/arrow_c_data_interface.h index 1473e24945b..077217b14af 100644 --- a/tsl/src/compression/arrow_c_data_interface.h +++ b/tsl/src/compression/arrow_c_data_interface.h @@ -184,25 +184,62 @@ arrow_set_row_validity(uint64 *bitmap, size_t row_number, bool value) } /* - * AND two optional arrow validity bitmaps into the given storage. + * Combine the validity bitmaps into the given storage. */ static inline const uint64 * arrow_combine_validity(size_t num_words, uint64 *restrict storage, const uint64 *filter1, - const uint64 *filter2) + const uint64 *filter2, const uint64 *filter3) { + /* + * Any and all of the filters can be null. For simplicity, move the non-null + * filters to the front. + */ + const uint64 *tmp; +#define SWAP(X, Y) \ + tmp = (X); \ + (X) = (Y); \ + (Y) = tmp; + + if (filter2 == NULL) + { + SWAP(filter2, filter3); + } + if (filter1 == NULL) { - return filter2; + SWAP(filter1, filter2); + + if (filter2 == NULL) + { + SWAP(filter2, filter3); + } } +#undef SWAP + + Assert(filter2 == NULL || filter1 != NULL); + Assert(filter3 == NULL || filter2 != NULL); if (filter2 == NULL) { + /* Either have one non-null filter, or all of them are null. */ return filter1; } - for (size_t i = 0; i < num_words; i++) + if (filter3 == NULL) + { + /* Have two non-null filters. */ + for (size_t i = 0; i < num_words; i++) + { + storage[i] = filter1[i] & filter2[i]; + } + } + else { - storage[i] = filter1[i] & filter2[i]; + /* Have three non-null filters. */ + for (size_t i = 0; i < num_words; i++) + { + storage[i] = filter1[i] & filter2[i] & filter3[i]; + } } return storage; diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 81ed77fa8e0..582df8cbc1b 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -20,18 +20,6 @@ #include "nodes/decompress_chunk/vector_predicates.h" #include "nodes/decompress_chunk/vector_quals.h" -/* - * VectorQualState for a compressed batch used to pass - * DecompressChunk-specific data to vector qual functions that are shared - * across scan nodes. - */ -typedef struct CompressedBatchVectorQualState -{ - VectorQualState vqstate; - DecompressBatchState *batch_state; - DecompressContext *dcontext; -} CompressedBatchVectorQualState; - /* * Create a single-value ArrowArray of an arithmetic type. This is a specialized * function because arithmetic types have a particular layout of ArrowArrays. @@ -312,7 +300,7 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state * VectorQualState->get_arrow_array() function used to interface with the * vector qual code across different scan nodes. */ -static const ArrowArray * +const ArrowArray * compressed_batch_get_arrow_array(VectorQualState *vqstate, Expr *expr, bool *is_default_value) { CompressedBatchVectorQualState *cbvqstate = (CompressedBatchVectorQualState *) vqstate; @@ -360,8 +348,6 @@ compressed_batch_get_arrow_array(VectorQualState *vqstate, Expr *expr, bool *is_ var->varattno); Assert(column_description != NULL); Assert(column_description->typid == var->vartype); - Ensure(column_description->type == COMPRESSED_COLUMN, - "only compressed columns are supported in vectorized quals"); CompressedColumnValues *column_values = &batch_state->compressed_columns[column_index]; diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.h b/tsl/src/nodes/decompress_chunk/compressed_batch.h index 3131c2c7fd2..9871503e37c 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.h +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.h @@ -7,6 +7,7 @@ #include "compression/compression.h" #include "nodes/decompress_chunk/decompress_context.h" +#include "nodes/decompress_chunk/vector_quals.h" #include typedef struct ArrowArray ArrowArray; @@ -172,3 +173,18 @@ compressed_batch_current_tuple(DecompressBatchState *batch_state) Assert(batch_state->per_batch_context != NULL); return &batch_state->decompressed_scan_slot_data.base; } + +/* + * VectorQualState for a compressed batch used to pass + * DecompressChunk-specific data to vector qual functions that are shared + * across scan nodes. + */ +typedef struct CompressedBatchVectorQualState +{ + VectorQualState vqstate; + DecompressBatchState *batch_state; + DecompressContext *dcontext; +} CompressedBatchVectorQualState; + +const ArrowArray *compressed_batch_get_arrow_array(VectorQualState *vqstate, Expr *expr, + bool *is_default_value); diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 8048fc2ceac..785b846e583 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -148,12 +148,6 @@ typedef struct } DecompressionMapContext; -typedef struct VectorQualInfoDecompressChunk -{ - VectorQualInfo vqinfo; - const UncompressedColumnInfo *colinfo; -} VectorQualInfoDecompressChunk; - static bool * build_vector_attrs_array(const UncompressedColumnInfo *colinfo, const CompressionInfo *info) { diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index 1f198d799a9..0bb932bce1a 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -18,6 +18,7 @@ #include "guc.h" #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/decompress_chunk/exec.h" +#include "nodes/decompress_chunk/vector_quals.h" #include "nodes/vector_agg.h" static int @@ -146,6 +147,11 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) { def->input_offset = -1; } + + if (aggref->aggfilter != NULL) + { + def->filter_clauses = list_make1(aggref->aggfilter); + } } else { @@ -282,6 +288,37 @@ vector_agg_exec(CustomScanState *node) dcontext->ps->instrument->tuplecount += not_filtered_rows; } + /* + * Compute the vectorized filters for the aggregate function FILTER + * clauses. + */ + const int naggs = vector_agg_state->num_agg_defs; + for (int i = 0; i < naggs; i++) + { + VectorAggDef *agg_def = &vector_agg_state->agg_defs[i]; + if (agg_def->filter_clauses == NIL) + { + continue; + } + CompressedBatchVectorQualState cbvqstate = { + .vqstate = { + .vectorized_quals_constified = agg_def->filter_clauses, + .num_results = batch_state->total_batch_rows, + .per_vector_mcxt = batch_state->per_batch_context, + .slot = compressed_slot, + .get_arrow_array = compressed_batch_get_arrow_array, + }, + .batch_state = batch_state, + .dcontext = dcontext, + }; + VectorQualState *vqstate = &cbvqstate.vqstate; + vector_qual_compute(vqstate); + agg_def->filter_result = vqstate->vector_qual_result; + } + + /* + * Finally, pass the compressed batch to the grouping policy. + */ grouping->gp_add_batch(grouping, batch_state); } diff --git a/tsl/src/nodes/vector_agg/exec.h b/tsl/src/nodes/vector_agg/exec.h index 26f832b3548..e5e9775ebb9 100644 --- a/tsl/src/nodes/vector_agg/exec.h +++ b/tsl/src/nodes/vector_agg/exec.h @@ -18,6 +18,8 @@ typedef struct VectorAggDef VectorAggFunctions func; int input_offset; int output_offset; + List *filter_clauses; + uint64 *filter_result; } VectorAggDef; typedef struct GroupingColumn diff --git a/tsl/src/nodes/vector_agg/grouping_policy_batch.c b/tsl/src/nodes/vector_agg/grouping_policy_batch.c index c9fa8f66709..c9b4249c3f4 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_batch.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_batch.c @@ -151,6 +151,7 @@ compute_single_aggregate(GroupingPolicyBatch *policy, DecompressBatchState *batc const uint64 *filter = arrow_combine_validity(num_words, policy->tmp_filter, batch_state->vector_qual_result, + agg_def->filter_result, arg_validity_bitmap); /* @@ -166,15 +167,16 @@ compute_single_aggregate(GroupingPolicyBatch *policy, DecompressBatchState *batc /* * Scalar argument, or count(*). Have to also count the valid rows in * the batch. - */ - const int n = arrow_num_valid(filter, batch_state->total_batch_rows); - - /* + * * The batches that are fully filtered out by vectorized quals should - * have been skipped by the caller. + * have been skipped by the caller, but we also have to check for the + * case when no rows match the aggregate FILTER clause. */ - Assert(n > 0); - agg_def->func.agg_scalar(agg_state, arg_datum, arg_isnull, n, agg_extra_mctx); + const int n = arrow_num_valid(filter, batch_state->total_batch_rows); + if (n > 0) + { + agg_def->func.agg_scalar(agg_state, arg_datum, arg_isnull, n, agg_extra_mctx); + } } } @@ -185,7 +187,7 @@ gp_batch_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) /* * Allocate the temporary filter array for computing the combined results of - * batch filter and column validity. + * batch filter, aggregate filter and column validity. */ const size_t num_words = (batch_state->total_batch_rows + 63) / 64; if (num_words > policy->num_tmp_filter_words) diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index bd0a236de58..5da5b713bac 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -17,6 +17,7 @@ #include "exec.h" #include "nodes/decompress_chunk/planner.h" +#include "nodes/decompress_chunk/vector_quals.h" #include "nodes/vector_agg.h" #include "utils.h" @@ -74,29 +75,44 @@ resolve_outer_special_vars_mutator(Node *node, void *context) return expression_tree_mutator(node, resolve_outer_special_vars_mutator, context); } - Var *aggregated_var = castNode(Var, node); - Ensure(aggregated_var->varno == OUTER_VAR, - "encountered unexpected varno %d as an aggregate argument", - aggregated_var->varno); - + Var *var = castNode(Var, node); CustomScan *custom = castNode(CustomScan, context); - TargetEntry *decompress_chunk_tentry = - castNode(TargetEntry, list_nth(custom->scan.plan.targetlist, aggregated_var->varattno - 1)); - Var *decompressed_var = castNode(Var, decompress_chunk_tentry->expr); - if (decompressed_var->varno == INDEX_VAR) + if ((Index) var->varno == (Index) custom->scan.scanrelid) + { + /* + * This is already the uncompressed chunk var. We can see it referenced + * by expressions in the output targetlist of DecompressChunk node. + */ + return (Node *) copyObject(var); + } + + if (var->varno == OUTER_VAR) + { + /* + * Reference into the output targetlist of the DecompressChunk node. + */ + TargetEntry *decompress_chunk_tentry = + castNode(TargetEntry, list_nth(custom->scan.plan.targetlist, var->varattno - 1)); + + return resolve_outer_special_vars_mutator((Node *) decompress_chunk_tentry->expr, context); + } + + if (var->varno == INDEX_VAR) { /* * This is a reference into the custom scan targetlist, we have to resolve * it as well. */ - decompressed_var = - castNode(Var, - castNode(TargetEntry, - list_nth(custom->custom_scan_tlist, decompressed_var->varattno - 1)) - ->expr); - } - Assert(decompressed_var->varno > 0); - return (Node *) copyObject(decompressed_var); + var = castNode(Var, + castNode(TargetEntry, list_nth(custom->custom_scan_tlist, var->varattno - 1)) + ->expr); + Assert(var->varno > 0); + + return (Node *) copyObject(var); + } + + Ensure(false, "encountered unexpected varno %d as an aggregate argument", var->varno); + return node; } /* @@ -115,20 +131,20 @@ resolve_outer_special_vars(List *agg_tlist, CustomScan *custom) * node. */ static Plan * -vector_agg_plan_create(Agg *agg, CustomScan *decompress_chunk) +vector_agg_plan_create(Agg *agg, CustomScan *decompress_chunk, List *resolved_targetlist) { CustomScan *vector_agg = (CustomScan *) makeNode(CustomScan); vector_agg->custom_plans = list_make1(decompress_chunk); vector_agg->methods = &scan_methods; + vector_agg->custom_scan_tlist = resolved_targetlist; + /* * Note that this is being called from the post-planning hook, and therefore * after set_plan_refs(). The meaning of output targetlists is different from * the previous planning stages, and they contain special varnos referencing * the scan targetlists. */ - vector_agg->custom_scan_tlist = - resolve_outer_special_vars(agg->plan.targetlist, decompress_chunk); vector_agg->scan.plan.targetlist = build_trivial_custom_output_targetlist(vector_agg->custom_scan_tlist); @@ -166,6 +182,69 @@ vector_agg_plan_create(Agg *agg, CustomScan *decompress_chunk) return (Plan *) vector_agg; } +/* + * Map the custom scan attribute number to the uncompressed chunk attribute + * number. + */ +static int +custom_scan_to_uncompressed_chunk_attno(List *custom_scan_tlist, int custom_scan_attno) +{ + if (custom_scan_tlist == NIL) + { + return custom_scan_attno; + } + + Var *var = + castNode(Var, + castNode(TargetEntry, + list_nth(custom_scan_tlist, AttrNumberGetAttrOffset(custom_scan_attno))) + ->expr); + return var->varattno; +} + +/* + * Whether the given compressed column index corresponds to a vector variable. + */ +static bool +is_vector_compressed_column(CustomScan *custom, int compressed_column_index, bool *out_is_segmentby) +{ + List *bulk_decompression_column = list_nth(custom->custom_private, DCP_BulkDecompressionColumn); + const bool bulk_decompression_enabled_for_column = + list_nth_int(bulk_decompression_column, compressed_column_index); + + /* + * Bulk decompression can be disabled for all columns in the DecompressChunk + * node settings, we can't do vectorized aggregation for compressed columns + * in that case. For segmentby columns it's still possible. + */ + List *settings = linitial(custom->custom_private); + const bool bulk_decompression_enabled_globally = + list_nth_int(settings, DCS_EnableBulkDecompression); + + /* + * Check if this column is a segmentby. + */ + List *is_segmentby_column = list_nth(custom->custom_private, DCP_IsSegmentbyColumn); + const bool is_segmentby = list_nth_int(is_segmentby_column, compressed_column_index); + if (out_is_segmentby) + { + *out_is_segmentby = is_segmentby; + } + + /* + * We support vectorized aggregation either for segmentby columns or for + * columns with bulk decompression enabled. + */ + if (!is_segmentby && + !(bulk_decompression_enabled_for_column && bulk_decompression_enabled_globally)) + { + /* Vectorized aggregation not possible for this particular column. */ + return false; + } + + return true; +} + /* * Whether the expression can be used for vectorized processing: must be a Var * that refers to either a bulk-decompressed or a segmentby column. @@ -179,89 +258,119 @@ is_vector_var(CustomScan *custom, Expr *expr, bool *out_is_segmentby) return false; } - Var *aggregated_var = castNode(Var, expr); + Var *decompressed_var = castNode(Var, expr); /* - * Check if this particular column is a segmentby or has bulk decompression - * enabled. This hook is called after set_plan_refs, and at this stage the - * output targetlist of the aggregation node uses OUTER_VAR references into - * the child scan targetlist, so first we have to translate this. + * This must be called after resolve_outer_special_vars(), so we should only + * see the uncompressed chunk variables here. */ - Assert(aggregated_var->varno == OUTER_VAR); - TargetEntry *decompressed_target_entry = - list_nth(custom->scan.plan.targetlist, AttrNumberGetAttrOffset(aggregated_var->varattno)); + Ensure((Index) decompressed_var->varno == (Index) custom->scan.scanrelid, + "expected scan varno %d got %d", + custom->scan.scanrelid, + decompressed_var->varno); - if (!IsA(decompressed_target_entry->expr, Var)) + if (decompressed_var->varattno <= 0) { - /* - * Can only aggregate the plain Vars. Not sure if this is redundant with - * the similar check above. - */ + /* Can't work with special attributes like tableoid. */ + if (out_is_segmentby) + { + *out_is_segmentby = false; + } return false; } - Var *decompressed_var = castNode(Var, decompressed_target_entry->expr); /* * Now, we have to translate the decompressed varno into the compressed * column index, to check if the column supports bulk decompression. */ List *decompression_map = list_nth(custom->custom_private, DCP_DecompressionMap); - List *is_segmentby_column = list_nth(custom->custom_private, DCP_IsSegmentbyColumn); - List *bulk_decompression_column = list_nth(custom->custom_private, DCP_BulkDecompressionColumn); int compressed_column_index = 0; for (; compressed_column_index < list_length(decompression_map); compressed_column_index++) { - if (list_nth_int(decompression_map, compressed_column_index) == decompressed_var->varattno) + const int custom_scan_attno = list_nth_int(decompression_map, compressed_column_index); + if (custom_scan_attno <= 0) + { + continue; + } + + const int uncompressed_chunk_attno = + custom_scan_to_uncompressed_chunk_attno(custom->custom_scan_tlist, custom_scan_attno); + + if (uncompressed_chunk_attno == decompressed_var->varattno) { break; } } Ensure(compressed_column_index < list_length(decompression_map), "compressed column not found"); - Assert(list_length(decompression_map) == list_length(bulk_decompression_column)); - const bool bulk_decompression_enabled_for_column = - list_nth_int(bulk_decompression_column, compressed_column_index); + return is_vector_compressed_column(custom, compressed_column_index, out_is_segmentby); +} + +/* + * Build supplementary info to determine whether we can vectorize the + * aggregate FILTER clauses. + */ +static VectorQualInfo +build_aggfilter_vector_qual_info(CustomScan *custom) +{ + VectorQualInfo vqi = { .rti = custom->scan.scanrelid }; /* - * Bulk decompression can be disabled for all columns in the DecompressChunk - * node settings, we can't do vectorized aggregation for compressed columns - * in that case. For segmentby columns it's still possible. + * Now, we have to translate the decompressed varno into the compressed + * column index, to check if the column supports bulk decompression. */ - List *settings = linitial(custom->custom_private); - const bool bulk_decompression_enabled_globally = - list_nth_int(settings, DCS_EnableBulkDecompression); + List *decompression_map = list_nth(custom->custom_private, DCP_DecompressionMap); /* - * Check if this column is a segmentby. + * There's no easy way to determine maximum attribute number for uncompressed + * chunk at this stage, so we'll have to go through all the compressed columns + * for this. */ - const bool is_segmentby = list_nth_int(is_segmentby_column, compressed_column_index); - if (out_is_segmentby) + int maxattno = 0; + for (int compressed_column_index = 0; compressed_column_index < list_length(decompression_map); + compressed_column_index++) { - *out_is_segmentby = is_segmentby; + const int custom_scan_attno = list_nth_int(decompression_map, compressed_column_index); + if (custom_scan_attno <= 0) + { + continue; + } + + const int uncompressed_chunk_attno = + custom_scan_to_uncompressed_chunk_attno(custom->custom_scan_tlist, custom_scan_attno); + + if (uncompressed_chunk_attno > maxattno) + { + maxattno = uncompressed_chunk_attno; + } } - /* - * We support vectorized aggregation either for segmentby columns or for - * columns with bulk decompression enabled. - */ - if (!is_segmentby && - !(bulk_decompression_enabled_for_column && bulk_decompression_enabled_globally)) + vqi.vector_attrs = (bool *) palloc0(sizeof(bool) * (maxattno + 1)); + + for (int compressed_column_index = 0; compressed_column_index < list_length(decompression_map); + compressed_column_index++) { - /* Vectorized aggregation not possible for this particular column. */ - return false; + const int custom_scan_attno = list_nth_int(decompression_map, compressed_column_index); + if (custom_scan_attno <= 0) + { + continue; + } + + const int uncompressed_chunk_attno = + custom_scan_to_uncompressed_chunk_attno(custom->custom_scan_tlist, custom_scan_attno); + + vqi.vector_attrs[uncompressed_chunk_attno] = + is_vector_compressed_column(custom, compressed_column_index, NULL); } - return true; + return vqi; } +/* + * Whether we can vectorize this particular aggregate. + */ static bool -can_vectorize_aggref(Aggref *aggref, CustomScan *custom) +can_vectorize_aggref(Aggref *aggref, CustomScan *custom, VectorQualInfo *vqi) { - if (aggref->aggfilter != NULL) - { - /* Filter clause on aggregate is not supported. */ - return false; - } - if (aggref->aggdirectargs != NIL) { /* Can't process ordered-set aggregates with direct arguments. */ @@ -282,8 +391,13 @@ can_vectorize_aggref(Aggref *aggref, CustomScan *custom) if (aggref->aggfilter != NULL) { - /* Can't process aggregates with filter clause. */ - return false; + /* Can process aggregates with filter clause if it's vectorizable. */ + Node *aggfilter_vectorized = vector_qual_make((Node *) aggref->aggfilter, vqi); + if (aggfilter_vectorized == NULL) + { + return false; + } + aggref->aggfilter = (Expr *) aggfilter_vectorized; } if (get_vector_aggregate(aggref->aggfnoid) == NULL) @@ -314,11 +428,13 @@ can_vectorize_aggref(Aggref *aggref, CustomScan *custom) /* * Whether we can perform vectorized aggregation with a given grouping. - * Currently supports either no grouping or grouping by segmentby columns. */ static bool -can_vectorize_grouping(Agg *agg, CustomScan *custom) +can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) { + /* + * We support vectorized aggregation without grouping. + */ if (agg->numCols == 0) { return true; @@ -327,7 +443,7 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom) for (int i = 0; i < agg->numCols; i++) { int offset = AttrNumberGetAttrOffset(agg->grpColIdx[i]); - TargetEntry *entry = list_nth(agg->plan.targetlist, offset); + TargetEntry *entry = list_nth_node(TargetEntry, resolved_targetlist, offset); bool is_segmentby = false; if (!is_vector_var(custom, entry->expr, &is_segmentby)) @@ -509,25 +625,54 @@ try_insert_vector_agg_node(Plan *plan) return plan; } - if (!can_vectorize_grouping(agg, custom)) + /* + * To make it easier to examine the variables participating in the aggregation, + * the subsequent checks are performed on the aggregated targetlist with + * all variables resolved to uncompressed chunk variables. + */ + List *resolved_targetlist = resolve_outer_special_vars(agg->plan.targetlist, custom); + + if (!can_vectorize_grouping(agg, custom, resolved_targetlist)) { /* No GROUP BY support for now. */ return plan; } - /* Now check the aggregate functions themselves. */ + /* + * Build supplementary info to determine whether we can vectorize the + * aggregate FILTER clauses. + */ + VectorQualInfo vqi = build_aggfilter_vector_qual_info(custom); + + /* Now check the output targetlist. */ ListCell *lc; - foreach (lc, agg->plan.targetlist) + foreach (lc, resolved_targetlist) { TargetEntry *target_entry = castNode(TargetEntry, lfirst(lc)); - if (!IsA(target_entry->expr, Aggref)) + if (IsA(target_entry->expr, Aggref)) { - continue; + Aggref *aggref = castNode(Aggref, target_entry->expr); + if (!can_vectorize_aggref(aggref, custom, &vqi)) + { + /* Aggregate function not vectorizable. */ + return plan; + } } - - Aggref *aggref = castNode(Aggref, target_entry->expr); - if (!can_vectorize_aggref(aggref, custom)) + else if (IsA(target_entry->expr, Var)) + { + if (!is_vector_var(custom, target_entry->expr, NULL)) + { + /* Variable not vectorizable. */ + return plan; + } + } + else { + /* + * Sometimes the plan can require this node to perform a projection, + * e.g. we can see a nested loop param in its output targetlist. We + * can't handle this case currently. + */ return plan; } } @@ -536,5 +681,5 @@ try_insert_vector_agg_node(Plan *plan) * Finally, all requirements are satisfied and we can vectorize this partial * aggregation node. */ - return vector_agg_plan_create(agg, custom); + return vector_agg_plan_create(agg, custom, resolved_targetlist); } diff --git a/tsl/test/expected/vector_agg_filter.out b/tsl/test/expected/vector_agg_filter.out new file mode 100644 index 00000000000..20d97efd1d4 --- /dev/null +++ b/tsl/test/expected/vector_agg_filter.out @@ -0,0 +1,2944 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +\c :TEST_DBNAME :ROLE_SUPERUSER +-- helper function: float -> pseudorandom float [-0.5..0.5] +CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ + SELECT hashfloat8(x::float8) / pow(2, 32) +$$ LANGUAGE SQL; +\set CHUNKS 2::int +\set CHUNK_ROWS 100000::int +\set GROUPING_CARDINALITY 10::int +create table aggfilter(t int, s int, + cint2 int2, dropped int4, cint4 int4); +select create_hypertable('aggfilter', 's', chunk_time_interval => :GROUPING_CARDINALITY / :CHUNKS); +NOTICE: adding not-null constraint to column "s" + create_hypertable +------------------------ + (1,public,aggfilter,t) +(1 row) + +create view source as +select s * 10000 + t as t, + s, + case when t % 1051 = 0 then null + else (mix(s + t * 1019) * 32767)::int2 end as cint2, + 1 as dropped, + (mix(s + t * 1021) * 32767)::int4 as cint4 +from + generate_series(1::int, :CHUNK_ROWS * :CHUNKS / :GROUPING_CARDINALITY) t, + generate_series(0::int, :GROUPING_CARDINALITY - 1::int) s(s) +; +insert into aggfilter select * from source where s = 1; +alter table aggfilter set (timescaledb.compress, timescaledb.compress_orderby = 't', + timescaledb.compress_segmentby = 's'); +select count(compress_chunk(x)) from show_chunks('aggfilter') x; + count +------- + 1 +(1 row) + +alter table aggfilter add column ss int default 11; +alter table aggfilter drop column dropped; +insert into aggfilter +select t, s, cint2, cint4, + case + -- null in entire batch + when s = 2 then null + -- null for some rows + when s = 3 and t % 1053 = 0 then null + -- for some rows same as default + when s = 4 and t % 1057 = 0 then 11 + -- not null for entire batch + else s + end as ss +from source where s != 1 +; +select count(compress_chunk(x)) from show_chunks('aggfilter') x; + count +------- + 2 +(1 row) + +vacuum freeze analyze aggfilter; +set timescaledb.debug_require_vector_agg = 'require'; +---- Uncomment to generate reference. Note that there are minor discrepancies +---- on float4 due to different numeric stability in our and PG implementations. +--set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'allow'; +select + format('%sselect %s%s(%s)%s from aggfilter%s%s%s;', + explain, + grouping || ', ', + function, variable, + ' filter (where ' || agg_filter || ')', + ' where ' || condition, + ' group by ' || grouping, + format(' order by %s(%s), ', function, variable) || grouping || ' limit 10', + function, variable) +from + unnest(array[ + 'explain (costs off) ', + null]) explain, + unnest(array[ + 's', + 'ss', + 'cint2', + 'cint4', + '*']) variable, + unnest(array[ + 'min', + 'count']) function, + unnest(array[ + null, + 'cint2 > 0', + 'cint2 is null']) with ordinality as condition(condition, n), + unnest(array[ + null, + 's']) with ordinality as grouping(grouping, n), + unnest(array[ + null, + 'cint2 < 0', + 'ss > 1000', + 'cint4 > 0', + 's != 5']) with ordinality as agg_filter(agg_filter, n) +where + true + and (explain is null /* or condition is null and grouping = 's' */) + and (variable != '*' or function = 'count') +order by explain, condition.n, variable, function, grouping.n, agg_filter.n +\gexec +select count(*) from aggfilter; + count +-------- + 200000 +(1 row) + +select count(*) filter (where cint2 < 0) from aggfilter; + count +-------- + 100139 +(1 row) + +select count(*) filter (where ss > 1000) from aggfilter; + count +------- + 0 +(1 row) + +select count(*) filter (where cint4 > 0) from aggfilter; + count +-------- + 100038 +(1 row) + +select count(*) filter (where s != 5) from aggfilter; + count +-------- + 180000 +(1 row) + +select s, count(*) from aggfilter group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 20000 + 4 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select s, count(*) filter (where cint2 < 0) from aggfilter group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 9968 + 1 | 9885 + 2 | 10113 + 3 | 10088 + 4 | 10074 + 5 | 9871 + 6 | 10089 + 7 | 10008 + 8 | 10082 + 9 | 9961 +(10 rows) + +select s, count(*) filter (where ss > 1000) from aggfilter group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(*) filter (where cint4 > 0) from aggfilter group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 10002 + 1 | 10046 + 2 | 9885 + 3 | 10063 + 4 | 9995 + 5 | 10106 + 6 | 9977 + 7 | 9983 + 8 | 10020 + 9 | 9961 +(10 rows) + +select s, count(*) filter (where s != 5) from aggfilter group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 20000 + 4 | 20000 + 5 | 0 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select count(cint2) from aggfilter; + count +-------- + 199810 +(1 row) + +select count(cint2) filter (where cint2 < 0) from aggfilter; + count +-------- + 100139 +(1 row) + +select count(cint2) filter (where ss > 1000) from aggfilter; + count +------- + 0 +(1 row) + +select count(cint2) filter (where cint4 > 0) from aggfilter; + count +------- + 99946 +(1 row) + +select count(cint2) filter (where s != 5) from aggfilter; + count +-------- + 179829 +(1 row) + +select s, count(cint2) from aggfilter group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 19981 + 1 | 19981 + 2 | 19981 + 3 | 19981 + 4 | 19981 + 5 | 19981 + 6 | 19981 + 7 | 19981 + 8 | 19981 + 9 | 19981 +(10 rows) + +select s, count(cint2) filter (where cint2 < 0) from aggfilter group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 9968 + 1 | 9885 + 2 | 10113 + 3 | 10088 + 4 | 10074 + 5 | 9871 + 6 | 10089 + 7 | 10008 + 8 | 10082 + 9 | 9961 +(10 rows) + +select s, count(cint2) filter (where ss > 1000) from aggfilter group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(cint2) filter (where cint4 > 0) from aggfilter group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 9993 + 1 | 10034 + 2 | 9876 + 3 | 10052 + 4 | 9990 + 5 | 10095 + 6 | 9968 + 7 | 9976 + 8 | 10010 + 9 | 9952 +(10 rows) + +select s, count(cint2) filter (where s != 5) from aggfilter group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 19981 + 1 | 19981 + 2 | 19981 + 3 | 19981 + 4 | 19981 + 5 | 0 + 6 | 19981 + 7 | 19981 + 8 | 19981 + 9 | 19981 +(10 rows) + +select min(cint2) from aggfilter; + min +-------- + -16383 +(1 row) + +select min(cint2) filter (where cint2 < 0) from aggfilter; + min +-------- + -16383 +(1 row) + +select min(cint2) filter (where ss > 1000) from aggfilter; + min +----- + +(1 row) + +select min(cint2) filter (where cint4 > 0) from aggfilter; + min +-------- + -16383 +(1 row) + +select min(cint2) filter (where s != 5) from aggfilter; + min +-------- + -16383 +(1 row) + +select s, min(cint2) from aggfilter group by s order by min(cint2), s limit 10; + s | min +---+-------- + 0 | -16383 + 4 | -16383 + 5 | -16383 + 6 | -16383 + 2 | -16382 + 7 | -16382 + 8 | -16382 + 3 | -16381 + 1 | -16378 + 9 | -16375 +(10 rows) + +select s, min(cint2) filter (where cint2 < 0) from aggfilter group by s order by min(cint2), s limit 10; + s | min +---+-------- + 0 | -16383 + 4 | -16383 + 5 | -16383 + 6 | -16383 + 2 | -16382 + 7 | -16382 + 8 | -16382 + 3 | -16381 + 1 | -16378 + 9 | -16375 +(10 rows) + +select s, min(cint2) filter (where ss > 1000) from aggfilter group by s order by min(cint2), s limit 10; + s | min +---+----- + 0 | + 4 | + 5 | + 6 | + 2 | + 7 | + 8 | + 3 | + 1 | + 9 | +(10 rows) + +select s, min(cint2) filter (where cint4 > 0) from aggfilter group by s order by min(cint2), s limit 10; + s | min +---+-------- + 0 | -16380 + 4 | -16381 + 5 | -16383 + 6 | -16379 + 2 | -16382 + 7 | -16382 + 8 | -16382 + 3 | -16380 + 1 | -16378 + 9 | -16375 +(10 rows) + +select s, min(cint2) filter (where s != 5) from aggfilter group by s order by min(cint2), s limit 10; + s | min +---+-------- + 0 | -16383 + 4 | -16383 + 5 | + 6 | -16383 + 2 | -16382 + 7 | -16382 + 8 | -16382 + 3 | -16381 + 1 | -16378 + 9 | -16375 +(10 rows) + +select count(cint4) from aggfilter; + count +-------- + 200000 +(1 row) + +select count(cint4) filter (where cint2 < 0) from aggfilter; + count +-------- + 100139 +(1 row) + +select count(cint4) filter (where ss > 1000) from aggfilter; + count +------- + 0 +(1 row) + +select count(cint4) filter (where cint4 > 0) from aggfilter; + count +-------- + 100038 +(1 row) + +select count(cint4) filter (where s != 5) from aggfilter; + count +-------- + 180000 +(1 row) + +select s, count(cint4) from aggfilter group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 20000 + 4 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select s, count(cint4) filter (where cint2 < 0) from aggfilter group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 9968 + 1 | 9885 + 2 | 10113 + 3 | 10088 + 4 | 10074 + 5 | 9871 + 6 | 10089 + 7 | 10008 + 8 | 10082 + 9 | 9961 +(10 rows) + +select s, count(cint4) filter (where ss > 1000) from aggfilter group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(cint4) filter (where cint4 > 0) from aggfilter group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 10002 + 1 | 10046 + 2 | 9885 + 3 | 10063 + 4 | 9995 + 5 | 10106 + 6 | 9977 + 7 | 9983 + 8 | 10020 + 9 | 9961 +(10 rows) + +select s, count(cint4) filter (where s != 5) from aggfilter group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 20000 + 4 | 20000 + 5 | 0 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select min(cint4) from aggfilter; + min +-------- + -16383 +(1 row) + +select min(cint4) filter (where cint2 < 0) from aggfilter; + min +-------- + -16383 +(1 row) + +select min(cint4) filter (where ss > 1000) from aggfilter; + min +----- + +(1 row) + +select min(cint4) filter (where cint4 > 0) from aggfilter; + min +----- + 1 +(1 row) + +select min(cint4) filter (where s != 5) from aggfilter; + min +-------- + -16383 +(1 row) + +select s, min(cint4) from aggfilter group by s order by min(cint4), s limit 10; + s | min +---+-------- + 0 | -16383 + 2 | -16383 + 7 | -16383 + 1 | -16382 + 3 | -16382 + 4 | -16382 + 6 | -16382 + 8 | -16382 + 9 | -16382 + 5 | -16380 +(10 rows) + +select s, min(cint4) filter (where cint2 < 0) from aggfilter group by s order by min(cint4), s limit 10; + s | min +---+-------- + 0 | -16382 + 2 | -16383 + 7 | -16383 + 1 | -16382 + 3 | -16382 + 4 | -16382 + 6 | -16381 + 8 | -16382 + 9 | -16381 + 5 | -16380 +(10 rows) + +select s, min(cint4) filter (where ss > 1000) from aggfilter group by s order by min(cint4), s limit 10; + s | min +---+----- + 0 | + 2 | + 7 | + 1 | + 3 | + 4 | + 6 | + 8 | + 9 | + 5 | +(10 rows) + +select s, min(cint4) filter (where cint4 > 0) from aggfilter group by s order by min(cint4), s limit 10; + s | min +---+----- + 0 | 1 + 2 | 2 + 7 | 1 + 1 | 3 + 3 | 2 + 4 | 2 + 6 | 2 + 8 | 2 + 9 | 2 + 5 | 2 +(10 rows) + +select s, min(cint4) filter (where s != 5) from aggfilter group by s order by min(cint4), s limit 10; + s | min +---+-------- + 0 | -16383 + 2 | -16383 + 7 | -16383 + 1 | -16382 + 3 | -16382 + 4 | -16382 + 6 | -16382 + 8 | -16382 + 9 | -16382 + 5 | +(10 rows) + +select count(s) from aggfilter; + count +-------- + 200000 +(1 row) + +select count(s) filter (where cint2 < 0) from aggfilter; + count +-------- + 100139 +(1 row) + +select count(s) filter (where ss > 1000) from aggfilter; + count +------- + 0 +(1 row) + +select count(s) filter (where cint4 > 0) from aggfilter; + count +-------- + 100038 +(1 row) + +select count(s) filter (where s != 5) from aggfilter; + count +-------- + 180000 +(1 row) + +select s, count(s) from aggfilter group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 20000 + 4 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select s, count(s) filter (where cint2 < 0) from aggfilter group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 9968 + 1 | 9885 + 2 | 10113 + 3 | 10088 + 4 | 10074 + 5 | 9871 + 6 | 10089 + 7 | 10008 + 8 | 10082 + 9 | 9961 +(10 rows) + +select s, count(s) filter (where ss > 1000) from aggfilter group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(s) filter (where cint4 > 0) from aggfilter group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 10002 + 1 | 10046 + 2 | 9885 + 3 | 10063 + 4 | 9995 + 5 | 10106 + 6 | 9977 + 7 | 9983 + 8 | 10020 + 9 | 9961 +(10 rows) + +select s, count(s) filter (where s != 5) from aggfilter group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 20000 + 1 | 20000 + 2 | 20000 + 3 | 20000 + 4 | 20000 + 5 | 0 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select min(s) from aggfilter; + min +----- + 0 +(1 row) + +select min(s) filter (where cint2 < 0) from aggfilter; + min +----- + 0 +(1 row) + +select min(s) filter (where ss > 1000) from aggfilter; + min +----- + +(1 row) + +select min(s) filter (where cint4 > 0) from aggfilter; + min +----- + 0 +(1 row) + +select min(s) filter (where s != 5) from aggfilter; + min +----- + 0 +(1 row) + +select s, min(s) from aggfilter group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select s, min(s) filter (where cint2 < 0) from aggfilter group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select s, min(s) filter (where ss > 1000) from aggfilter group by s order by min(s), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(s) filter (where cint4 > 0) from aggfilter group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select s, min(s) filter (where s != 5) from aggfilter group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select count(ss) from aggfilter; + count +-------- + 199981 +(1 row) + +select count(ss) filter (where cint2 < 0) from aggfilter; + count +-------- + 100127 +(1 row) + +select count(ss) filter (where ss > 1000) from aggfilter; + count +------- + 0 +(1 row) + +select count(ss) filter (where cint4 > 0) from aggfilter; + count +-------- + 100027 +(1 row) + +select count(ss) filter (where s != 5) from aggfilter; + count +-------- + 179981 +(1 row) + +select s, count(ss) from aggfilter group by s order by count(ss), s limit 10; + s | count +---+------- + 3 | 19981 + 0 | 20000 + 1 | 20000 + 2 | 20000 + 4 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select s, count(ss) filter (where cint2 < 0) from aggfilter group by s order by count(ss), s limit 10; + s | count +---+------- + 3 | 10076 + 0 | 9968 + 1 | 9885 + 2 | 10113 + 4 | 10074 + 5 | 9871 + 6 | 10089 + 7 | 10008 + 8 | 10082 + 9 | 9961 +(10 rows) + +select s, count(ss) filter (where ss > 1000) from aggfilter group by s order by count(ss), s limit 10; + s | count +---+------- + 3 | 0 + 0 | 0 + 1 | 0 + 2 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(ss) filter (where cint4 > 0) from aggfilter group by s order by count(ss), s limit 10; + s | count +---+------- + 3 | 10052 + 0 | 10002 + 1 | 10046 + 2 | 9885 + 4 | 9995 + 5 | 10106 + 6 | 9977 + 7 | 9983 + 8 | 10020 + 9 | 9961 +(10 rows) + +select s, count(ss) filter (where s != 5) from aggfilter group by s order by count(ss), s limit 10; + s | count +---+------- + 3 | 19981 + 0 | 20000 + 1 | 20000 + 2 | 20000 + 4 | 20000 + 5 | 0 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 +(10 rows) + +select min(ss) from aggfilter; + min +----- + 0 +(1 row) + +select min(ss) filter (where cint2 < 0) from aggfilter; + min +----- + 0 +(1 row) + +select min(ss) filter (where ss > 1000) from aggfilter; + min +----- + +(1 row) + +select min(ss) filter (where cint4 > 0) from aggfilter; + min +----- + 0 +(1 row) + +select min(ss) filter (where s != 5) from aggfilter; + min +----- + 0 +(1 row) + +select s, min(ss) from aggfilter group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select s, min(ss) filter (where cint2 < 0) from aggfilter group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select s, min(ss) filter (where ss > 1000) from aggfilter group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 1 | + 2 | +(10 rows) + +select s, min(ss) filter (where cint4 > 0) from aggfilter group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select s, min(ss) filter (where s != 5) from aggfilter group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select count(*) from aggfilter where cint2 > 0; + count +------- + 99664 +(1 row) + +select count(*) filter (where cint2 < 0) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(*) filter (where ss > 1000) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(*) filter (where cint4 > 0) from aggfilter where cint2 > 0; + count +------- + 49817 +(1 row) + +select count(*) filter (where s != 5) from aggfilter where cint2 > 0; + count +------- + 89554 +(1 row) + +select s, count(*) from aggfilter where cint2 > 0 group by s order by count(*), s limit 10; + s | count +---+------- + 2 | 9868 + 6 | 9890 + 3 | 9893 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 10110 +(10 rows) + +select s, count(*) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by count(*), s limit 10; + s | count +---+------- + 2 | 0 + 6 | 0 + 3 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(*) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by count(*), s limit 10; + s | count +---+------- + 2 | 0 + 6 | 0 + 3 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(*) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by count(*), s limit 10; + s | count +---+------- + 2 | 4923 + 6 | 4911 + 3 | 4972 + 8 | 4929 + 4 | 4944 + 7 | 4990 + 0 | 4963 + 9 | 4951 + 1 | 5067 + 5 | 5167 +(10 rows) + +select s, count(*) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by count(*), s limit 10; + s | count +---+------- + 2 | 9868 + 6 | 9890 + 3 | 9893 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 0 +(10 rows) + +select count(cint2) from aggfilter where cint2 > 0; + count +------- + 99664 +(1 row) + +select count(cint2) filter (where cint2 < 0) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(cint2) filter (where ss > 1000) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(cint2) filter (where cint4 > 0) from aggfilter where cint2 > 0; + count +------- + 49817 +(1 row) + +select count(cint2) filter (where s != 5) from aggfilter where cint2 > 0; + count +------- + 89554 +(1 row) + +select s, count(cint2) from aggfilter where cint2 > 0 group by s order by count(cint2), s limit 10; + s | count +---+------- + 2 | 9868 + 6 | 9890 + 3 | 9893 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 10110 +(10 rows) + +select s, count(cint2) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by count(cint2), s limit 10; + s | count +---+------- + 2 | 0 + 6 | 0 + 3 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(cint2) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by count(cint2), s limit 10; + s | count +---+------- + 2 | 0 + 6 | 0 + 3 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(cint2) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by count(cint2), s limit 10; + s | count +---+------- + 2 | 4923 + 6 | 4911 + 3 | 4972 + 8 | 4929 + 4 | 4944 + 7 | 4990 + 0 | 4963 + 9 | 4951 + 1 | 5067 + 5 | 5167 +(10 rows) + +select s, count(cint2) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by count(cint2), s limit 10; + s | count +---+------- + 2 | 9868 + 6 | 9890 + 3 | 9893 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 0 +(10 rows) + +select min(cint2) from aggfilter where cint2 > 0; + min +----- + 1 +(1 row) + +select min(cint2) filter (where cint2 < 0) from aggfilter where cint2 > 0; + min +----- + +(1 row) + +select min(cint2) filter (where ss > 1000) from aggfilter where cint2 > 0; + min +----- + +(1 row) + +select min(cint2) filter (where cint4 > 0) from aggfilter where cint2 > 0; + min +----- + 1 +(1 row) + +select min(cint2) filter (where s != 5) from aggfilter where cint2 > 0; + min +----- + 1 +(1 row) + +select s, min(cint2) from aggfilter where cint2 > 0 group by s order by min(cint2), s limit 10; + s | min +---+----- + 1 | 1 + 2 | 1 + 3 | 1 + 5 | 1 + 7 | 1 + 8 | 1 + 9 | 2 + 6 | 3 + 0 | 4 + 4 | 4 +(10 rows) + +select s, min(cint2) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by min(cint2), s limit 10; + s | min +---+----- + 1 | + 2 | + 3 | + 5 | + 7 | + 8 | + 9 | + 6 | + 0 | + 4 | +(10 rows) + +select s, min(cint2) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by min(cint2), s limit 10; + s | min +---+----- + 1 | + 2 | + 3 | + 5 | + 7 | + 8 | + 9 | + 6 | + 0 | + 4 | +(10 rows) + +select s, min(cint2) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by min(cint2), s limit 10; + s | min +---+----- + 1 | 4 + 2 | 4 + 3 | 1 + 5 | 1 + 7 | 1 + 8 | 1 + 9 | 6 + 6 | 3 + 0 | 6 + 4 | 4 +(10 rows) + +select s, min(cint2) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by min(cint2), s limit 10; + s | min +---+----- + 1 | 1 + 2 | 1 + 3 | 1 + 5 | + 7 | 1 + 8 | 1 + 9 | 2 + 6 | 3 + 0 | 4 + 4 | 4 +(10 rows) + +select count(cint4) from aggfilter where cint2 > 0; + count +------- + 99664 +(1 row) + +select count(cint4) filter (where cint2 < 0) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(cint4) filter (where ss > 1000) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(cint4) filter (where cint4 > 0) from aggfilter where cint2 > 0; + count +------- + 49817 +(1 row) + +select count(cint4) filter (where s != 5) from aggfilter where cint2 > 0; + count +------- + 89554 +(1 row) + +select s, count(cint4) from aggfilter where cint2 > 0 group by s order by count(cint4), s limit 10; + s | count +---+------- + 2 | 9868 + 6 | 9890 + 3 | 9893 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 10110 +(10 rows) + +select s, count(cint4) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by count(cint4), s limit 10; + s | count +---+------- + 2 | 0 + 6 | 0 + 3 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(cint4) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by count(cint4), s limit 10; + s | count +---+------- + 2 | 0 + 6 | 0 + 3 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(cint4) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by count(cint4), s limit 10; + s | count +---+------- + 2 | 4923 + 6 | 4911 + 3 | 4972 + 8 | 4929 + 4 | 4944 + 7 | 4990 + 0 | 4963 + 9 | 4951 + 1 | 5067 + 5 | 5167 +(10 rows) + +select s, count(cint4) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by count(cint4), s limit 10; + s | count +---+------- + 2 | 9868 + 6 | 9890 + 3 | 9893 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 0 +(10 rows) + +select min(cint4) from aggfilter where cint2 > 0; + min +-------- + -16383 +(1 row) + +select min(cint4) filter (where cint2 < 0) from aggfilter where cint2 > 0; + min +----- + +(1 row) + +select min(cint4) filter (where ss > 1000) from aggfilter where cint2 > 0; + min +----- + +(1 row) + +select min(cint4) filter (where cint4 > 0) from aggfilter where cint2 > 0; + min +----- + 1 +(1 row) + +select min(cint4) filter (where s != 5) from aggfilter where cint2 > 0; + min +-------- + -16383 +(1 row) + +select s, min(cint4) from aggfilter where cint2 > 0 group by s order by min(cint4), s limit 10; + s | min +---+-------- + 0 | -16383 + 1 | -16382 + 3 | -16382 + 4 | -16382 + 6 | -16382 + 7 | -16382 + 9 | -16382 + 2 | -16377 + 8 | -16377 + 5 | -16375 +(10 rows) + +select s, min(cint4) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by min(cint4), s limit 10; + s | min +---+----- + 0 | + 1 | + 3 | + 4 | + 6 | + 7 | + 9 | + 2 | + 8 | + 5 | +(10 rows) + +select s, min(cint4) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by min(cint4), s limit 10; + s | min +---+----- + 0 | + 1 | + 3 | + 4 | + 6 | + 7 | + 9 | + 2 | + 8 | + 5 | +(10 rows) + +select s, min(cint4) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by min(cint4), s limit 10; + s | min +---+----- + 0 | 1 + 1 | 7 + 3 | 2 + 4 | 5 + 6 | 2 + 7 | 1 + 9 | 2 + 2 | 2 + 8 | 5 + 5 | 2 +(10 rows) + +select s, min(cint4) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by min(cint4), s limit 10; + s | min +---+-------- + 0 | -16383 + 1 | -16382 + 3 | -16382 + 4 | -16382 + 6 | -16382 + 7 | -16382 + 9 | -16382 + 2 | -16377 + 8 | -16377 + 5 | +(10 rows) + +select count(s) from aggfilter where cint2 > 0; + count +------- + 99664 +(1 row) + +select count(s) filter (where cint2 < 0) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(s) filter (where ss > 1000) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(s) filter (where cint4 > 0) from aggfilter where cint2 > 0; + count +------- + 49817 +(1 row) + +select count(s) filter (where s != 5) from aggfilter where cint2 > 0; + count +------- + 89554 +(1 row) + +select s, count(s) from aggfilter where cint2 > 0 group by s order by count(s), s limit 10; + s | count +---+------- + 2 | 9868 + 6 | 9890 + 3 | 9893 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 10110 +(10 rows) + +select s, count(s) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by count(s), s limit 10; + s | count +---+------- + 2 | 0 + 6 | 0 + 3 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(s) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by count(s), s limit 10; + s | count +---+------- + 2 | 0 + 6 | 0 + 3 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(s) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by count(s), s limit 10; + s | count +---+------- + 2 | 4923 + 6 | 4911 + 3 | 4972 + 8 | 4929 + 4 | 4944 + 7 | 4990 + 0 | 4963 + 9 | 4951 + 1 | 5067 + 5 | 5167 +(10 rows) + +select s, count(s) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by count(s), s limit 10; + s | count +---+------- + 2 | 9868 + 6 | 9890 + 3 | 9893 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 0 +(10 rows) + +select min(s) from aggfilter where cint2 > 0; + min +----- + 0 +(1 row) + +select min(s) filter (where cint2 < 0) from aggfilter where cint2 > 0; + min +----- + +(1 row) + +select min(s) filter (where ss > 1000) from aggfilter where cint2 > 0; + min +----- + +(1 row) + +select min(s) filter (where cint4 > 0) from aggfilter where cint2 > 0; + min +----- + 0 +(1 row) + +select min(s) filter (where s != 5) from aggfilter where cint2 > 0; + min +----- + 0 +(1 row) + +select s, min(s) from aggfilter where cint2 > 0 group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select s, min(s) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by min(s), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(s) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by min(s), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(s) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select s, min(s) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select count(ss) from aggfilter where cint2 > 0; + count +------- + 99657 +(1 row) + +select count(ss) filter (where cint2 < 0) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(ss) filter (where ss > 1000) from aggfilter where cint2 > 0; + count +------- + 0 +(1 row) + +select count(ss) filter (where cint4 > 0) from aggfilter where cint2 > 0; + count +------- + 49813 +(1 row) + +select count(ss) filter (where s != 5) from aggfilter where cint2 > 0; + count +------- + 89547 +(1 row) + +select s, count(ss) from aggfilter where cint2 > 0 group by s order by count(ss), s limit 10; + s | count +---+------- + 2 | 9868 + 3 | 9886 + 6 | 9890 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 10110 +(10 rows) + +select s, count(ss) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by count(ss), s limit 10; + s | count +---+------- + 2 | 0 + 3 | 0 + 6 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(ss) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by count(ss), s limit 10; + s | count +---+------- + 2 | 0 + 3 | 0 + 6 | 0 + 8 | 0 + 4 | 0 + 7 | 0 + 0 | 0 + 9 | 0 + 1 | 0 + 5 | 0 +(10 rows) + +select s, count(ss) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by count(ss), s limit 10; + s | count +---+------- + 2 | 4923 + 3 | 4968 + 6 | 4911 + 8 | 4929 + 4 | 4944 + 7 | 4990 + 0 | 4963 + 9 | 4951 + 1 | 5067 + 5 | 5167 +(10 rows) + +select s, count(ss) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by count(ss), s limit 10; + s | count +---+------- + 2 | 9868 + 3 | 9886 + 6 | 9890 + 8 | 9898 + 4 | 9906 + 7 | 9973 + 0 | 10012 + 9 | 10018 + 1 | 10096 + 5 | 0 +(10 rows) + +select min(ss) from aggfilter where cint2 > 0; + min +----- + 0 +(1 row) + +select min(ss) filter (where cint2 < 0) from aggfilter where cint2 > 0; + min +----- + +(1 row) + +select min(ss) filter (where ss > 1000) from aggfilter where cint2 > 0; + min +----- + +(1 row) + +select min(ss) filter (where cint4 > 0) from aggfilter where cint2 > 0; + min +----- + 0 +(1 row) + +select min(ss) filter (where s != 5) from aggfilter where cint2 > 0; + min +----- + 0 +(1 row) + +select s, min(ss) from aggfilter where cint2 > 0 group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select s, min(ss) filter (where cint2 < 0) from aggfilter where cint2 > 0 group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 1 | + 2 | +(10 rows) + +select s, min(ss) filter (where ss > 1000) from aggfilter where cint2 > 0 group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 1 | + 2 | +(10 rows) + +select s, min(ss) filter (where cint4 > 0) from aggfilter where cint2 > 0 group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select s, min(ss) filter (where s != 5) from aggfilter where cint2 > 0 group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select count(*) from aggfilter where cint2 is null; + count +------- + 190 +(1 row) + +select count(*) filter (where cint2 < 0) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(*) filter (where ss > 1000) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(*) filter (where cint4 > 0) from aggfilter where cint2 is null; + count +------- + 92 +(1 row) + +select count(*) filter (where s != 5) from aggfilter where cint2 is null; + count +------- + 171 +(1 row) + +select s, count(*) from aggfilter where cint2 is null group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 19 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select s, count(*) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(*) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(*) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 9 + 1 | 12 + 2 | 9 + 3 | 11 + 4 | 5 + 5 | 11 + 6 | 9 + 7 | 7 + 8 | 10 + 9 | 9 +(10 rows) + +select s, count(*) filter (where s != 5) from aggfilter where cint2 is null group by s order by count(*), s limit 10; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 0 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select count(cint2) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(cint2) filter (where cint2 < 0) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(cint2) filter (where ss > 1000) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(cint2) filter (where cint4 > 0) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(cint2) filter (where s != 5) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select s, count(cint2) from aggfilter where cint2 is null group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(cint2) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(cint2) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(cint2) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(cint2) filter (where s != 5) from aggfilter where cint2 is null group by s order by count(cint2), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select min(cint2) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(cint2) filter (where cint2 < 0) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(cint2) filter (where ss > 1000) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(cint2) filter (where cint4 > 0) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(cint2) filter (where s != 5) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select s, min(cint2) from aggfilter where cint2 is null group by s order by min(cint2), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(cint2) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by min(cint2), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(cint2) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by min(cint2), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(cint2) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by min(cint2), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(cint2) filter (where s != 5) from aggfilter where cint2 is null group by s order by min(cint2), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select count(cint4) from aggfilter where cint2 is null; + count +------- + 190 +(1 row) + +select count(cint4) filter (where cint2 < 0) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(cint4) filter (where ss > 1000) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(cint4) filter (where cint4 > 0) from aggfilter where cint2 is null; + count +------- + 92 +(1 row) + +select count(cint4) filter (where s != 5) from aggfilter where cint2 is null; + count +------- + 171 +(1 row) + +select s, count(cint4) from aggfilter where cint2 is null group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 19 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select s, count(cint4) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(cint4) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(cint4) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 9 + 1 | 12 + 2 | 9 + 3 | 11 + 4 | 5 + 5 | 11 + 6 | 9 + 7 | 7 + 8 | 10 + 9 | 9 +(10 rows) + +select s, count(cint4) filter (where s != 5) from aggfilter where cint2 is null group by s order by count(cint4), s limit 10; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 0 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select min(cint4) from aggfilter where cint2 is null; + min +-------- + -16291 +(1 row) + +select min(cint4) filter (where cint2 < 0) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(cint4) filter (where ss > 1000) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(cint4) filter (where cint4 > 0) from aggfilter where cint2 is null; + min +----- + 473 +(1 row) + +select min(cint4) filter (where s != 5) from aggfilter where cint2 is null; + min +-------- + -16291 +(1 row) + +select s, min(cint4) from aggfilter where cint2 is null group by s order by min(cint4), s limit 10; + s | min +---+-------- + 0 | -16291 + 7 | -16091 + 4 | -15724 + 5 | -15279 + 2 | -15063 + 6 | -14998 + 9 | -14699 + 8 | -14214 + 1 | -12217 + 3 | -9908 +(10 rows) + +select s, min(cint4) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by min(cint4), s limit 10; + s | min +---+----- + 0 | + 7 | + 4 | + 5 | + 2 | + 6 | + 9 | + 8 | + 1 | + 3 | +(10 rows) + +select s, min(cint4) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by min(cint4), s limit 10; + s | min +---+----- + 0 | + 7 | + 4 | + 5 | + 2 | + 6 | + 9 | + 8 | + 1 | + 3 | +(10 rows) + +select s, min(cint4) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by min(cint4), s limit 10; + s | min +---+------ + 0 | 701 + 7 | 1695 + 4 | 1821 + 5 | 587 + 2 | 2876 + 6 | 1003 + 9 | 2489 + 8 | 1334 + 1 | 2034 + 3 | 473 +(10 rows) + +select s, min(cint4) filter (where s != 5) from aggfilter where cint2 is null group by s order by min(cint4), s limit 10; + s | min +---+-------- + 0 | -16291 + 7 | -16091 + 4 | -15724 + 5 | + 2 | -15063 + 6 | -14998 + 9 | -14699 + 8 | -14214 + 1 | -12217 + 3 | -9908 +(10 rows) + +select count(s) from aggfilter where cint2 is null; + count +------- + 190 +(1 row) + +select count(s) filter (where cint2 < 0) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(s) filter (where ss > 1000) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(s) filter (where cint4 > 0) from aggfilter where cint2 is null; + count +------- + 92 +(1 row) + +select count(s) filter (where s != 5) from aggfilter where cint2 is null; + count +------- + 171 +(1 row) + +select s, count(s) from aggfilter where cint2 is null group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 19 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select s, count(s) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(s) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(s) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 9 + 1 | 12 + 2 | 9 + 3 | 11 + 4 | 5 + 5 | 11 + 6 | 9 + 7 | 7 + 8 | 10 + 9 | 9 +(10 rows) + +select s, count(s) filter (where s != 5) from aggfilter where cint2 is null group by s order by count(s), s limit 10; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 0 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select min(s) from aggfilter where cint2 is null; + min +----- + 0 +(1 row) + +select min(s) filter (where cint2 < 0) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(s) filter (where ss > 1000) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(s) filter (where cint4 > 0) from aggfilter where cint2 is null; + min +----- + 0 +(1 row) + +select min(s) filter (where s != 5) from aggfilter where cint2 is null; + min +----- + 0 +(1 row) + +select s, min(s) from aggfilter where cint2 is null group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select s, min(s) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by min(s), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(s) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by min(s), s limit 10; + s | min +---+----- + 0 | + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | +(10 rows) + +select s, min(s) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select s, min(s) filter (where s != 5) from aggfilter where cint2 is null group by s order by min(s), s limit 10; + s | min +---+----- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +select count(ss) from aggfilter where cint2 is null; + count +------- + 190 +(1 row) + +select count(ss) filter (where cint2 < 0) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(ss) filter (where ss > 1000) from aggfilter where cint2 is null; + count +------- + 0 +(1 row) + +select count(ss) filter (where cint4 > 0) from aggfilter where cint2 is null; + count +------- + 92 +(1 row) + +select count(ss) filter (where s != 5) from aggfilter where cint2 is null; + count +------- + 171 +(1 row) + +select s, count(ss) from aggfilter where cint2 is null group by s order by count(ss), s limit 10; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 19 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select s, count(ss) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by count(ss), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(ss) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by count(ss), s limit 10; + s | count +---+------- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 +(10 rows) + +select s, count(ss) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by count(ss), s limit 10; + s | count +---+------- + 0 | 9 + 1 | 12 + 2 | 9 + 3 | 11 + 4 | 5 + 5 | 11 + 6 | 9 + 7 | 7 + 8 | 10 + 9 | 9 +(10 rows) + +select s, count(ss) filter (where s != 5) from aggfilter where cint2 is null group by s order by count(ss), s limit 10; + s | count +---+------- + 0 | 19 + 1 | 19 + 2 | 19 + 3 | 19 + 4 | 19 + 5 | 0 + 6 | 19 + 7 | 19 + 8 | 19 + 9 | 19 +(10 rows) + +select min(ss) from aggfilter where cint2 is null; + min +----- + 0 +(1 row) + +select min(ss) filter (where cint2 < 0) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(ss) filter (where ss > 1000) from aggfilter where cint2 is null; + min +----- + +(1 row) + +select min(ss) filter (where cint4 > 0) from aggfilter where cint2 is null; + min +----- + 0 +(1 row) + +select min(ss) filter (where s != 5) from aggfilter where cint2 is null; + min +----- + 0 +(1 row) + +select s, min(ss) from aggfilter where cint2 is null group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select s, min(ss) filter (where cint2 < 0) from aggfilter where cint2 is null group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 1 | + 2 | +(10 rows) + +select s, min(ss) filter (where ss > 1000) from aggfilter where cint2 is null group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 1 | + 2 | +(10 rows) + +select s, min(ss) filter (where cint4 > 0) from aggfilter where cint2 is null group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + +select s, min(ss) filter (where s != 5) from aggfilter where cint2 is null group by s order by min(ss), s limit 10; + s | min +---+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 11 + 2 | 11 +(10 rows) + diff --git a/tsl/test/expected/vector_agg_param.out b/tsl/test/expected/vector_agg_param.out index b481d9c8a97..c04e7b682b2 100644 --- a/tsl/test/expected/vector_agg_param.out +++ b/tsl/test/expected/vector_agg_param.out @@ -21,23 +21,40 @@ select count(compress_chunk(x)) from show_chunks('pvagg') x; (1 row) analyze pvagg; -explain (costs off) +-- Uncomment to generate reference +--set timescaledb.enable_vectorized_aggregation to off; +explain (verbose, costs off) select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a) from pvagg where s = x) xx; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop - -> Function Scan on unnest x + Output: x.x, (sum(pvagg.a)) + -> Function Scan on pg_catalog.unnest x + Output: x.x + Function Call: unnest('{0,1,2}'::integer[]) -> Finalize Aggregate - -> Custom Scan (ChunkAppend) on pvagg + Output: sum(pvagg.a) + -> Custom Scan (ChunkAppend) on public.pvagg + Output: (PARTIAL sum(pvagg.a)) + Startup Exclusion: false + Runtime Exclusion: true -> Custom Scan (VectorAgg) - -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk - -> Seq Scan on compress_hyper_2_3_chunk - Filter: (s = x.x) + Output: (PARTIAL sum(_hyper_1_1_chunk.a)) + Grouping Policy: all compressed batches + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.a + -> Seq Scan on _timescaledb_internal.compress_hyper_2_3_chunk + Output: compress_hyper_2_3_chunk._ts_meta_count, compress_hyper_2_3_chunk.s, compress_hyper_2_3_chunk._ts_meta_min_1, compress_hyper_2_3_chunk._ts_meta_max_1, compress_hyper_2_3_chunk.a + Filter: (compress_hyper_2_3_chunk.s = x.x) -> Custom Scan (VectorAgg) - -> Custom Scan (DecompressChunk) on _hyper_1_2_chunk - -> Seq Scan on compress_hyper_2_4_chunk - Filter: (s = x.x) -(12 rows) + Output: (PARTIAL sum(_hyper_1_2_chunk.a)) + Grouping Policy: all compressed batches + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.a + -> Seq Scan on _timescaledb_internal.compress_hyper_2_4_chunk + Output: compress_hyper_2_4_chunk._ts_meta_count, compress_hyper_2_4_chunk.s, compress_hyper_2_4_chunk._ts_meta_min_1, compress_hyper_2_4_chunk._ts_meta_max_1, compress_hyper_2_4_chunk.a + Filter: (compress_hyper_2_4_chunk.s = x.x) +(27 rows) select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a) from pvagg where s = x) xx; x | sum @@ -47,4 +64,48 @@ select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a) from pvagg 2 | 1498500 (3 rows) +explain (verbose, costs off) +select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a + x) from pvagg) xx; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + Output: x.x, (sum((_hyper_1_1_chunk.a + x.x))) + -> Function Scan on pg_catalog.unnest x + Output: x.x + Function Call: unnest('{0,1,2}'::integer[]) + -> Finalize Aggregate + Output: sum((_hyper_1_1_chunk.a + x.x)) + -> Append + -> Partial Aggregate + Output: PARTIAL sum((_hyper_1_1_chunk.a + x.x)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.a + -> Seq Scan on _timescaledb_internal.compress_hyper_2_3_chunk + Output: compress_hyper_2_3_chunk._ts_meta_count, compress_hyper_2_3_chunk.s, compress_hyper_2_3_chunk._ts_meta_min_1, compress_hyper_2_3_chunk._ts_meta_max_1, compress_hyper_2_3_chunk.a + -> Partial Aggregate + Output: PARTIAL sum((_hyper_1_2_chunk.a + x.x)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.a + -> Seq Scan on _timescaledb_internal.compress_hyper_2_4_chunk + Output: compress_hyper_2_4_chunk._ts_meta_count, compress_hyper_2_4_chunk.s, compress_hyper_2_4_chunk._ts_meta_min_1, compress_hyper_2_4_chunk._ts_meta_max_1, compress_hyper_2_4_chunk.a +(20 rows) + +select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a + x) from pvagg) xx; + x | sum +---+--------- + 0 | 1998000 + 1 | 1999998 + 2 | 2001996 +(3 rows) + +-- The plan for this query differs after PG16, x is not used as grouping key but +-- just added into the output targetlist of partial aggregation nodes. +select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a) from pvagg group by x) xx; + x | sum +---+--------- + 0 | 1998000 + 1 | 1998000 + 2 | 1998000 +(3 rows) + drop table pvagg; diff --git a/tsl/test/expected/vectorized_aggregation.out b/tsl/test/expected/vectorized_aggregation.out index 54a712d8c12..4baae6e72af 100644 --- a/tsl/test/expected/vectorized_aggregation.out +++ b/tsl/test/expected/vectorized_aggregation.out @@ -2863,67 +2863,77 @@ SELECT sum(segment_by_value) FILTER (WHERE segment_by_value > 99999) FROM testta Output: (PARTIAL sum(_hyper_1_81_chunk.segment_by_value) FILTER (WHERE (_hyper_1_81_chunk.segment_by_value > 99999))) Workers Planned: 2 -> Parallel Append - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_81_chunk.segment_by_value) FILTER (WHERE (_hyper_1_81_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_81_chunk.segment_by_value) FILTER (WHERE (_hyper_1_81_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_81_chunk Output: _hyper_1_81_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_91_chunk Output: compress_hyper_2_91_chunk._ts_meta_count, compress_hyper_2_91_chunk.segment_by_value, compress_hyper_2_91_chunk._ts_meta_min_1, compress_hyper_2_91_chunk._ts_meta_max_1, compress_hyper_2_91_chunk."time", compress_hyper_2_91_chunk.int_value, compress_hyper_2_91_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_82_chunk.segment_by_value) FILTER (WHERE (_hyper_1_82_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_82_chunk.segment_by_value) FILTER (WHERE (_hyper_1_82_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_82_chunk Output: _hyper_1_82_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_92_chunk Output: compress_hyper_2_92_chunk._ts_meta_count, compress_hyper_2_92_chunk.segment_by_value, compress_hyper_2_92_chunk._ts_meta_min_1, compress_hyper_2_92_chunk._ts_meta_max_1, compress_hyper_2_92_chunk."time", compress_hyper_2_92_chunk.int_value, compress_hyper_2_92_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_83_chunk.segment_by_value) FILTER (WHERE (_hyper_1_83_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_83_chunk.segment_by_value) FILTER (WHERE (_hyper_1_83_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_83_chunk Output: _hyper_1_83_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_93_chunk Output: compress_hyper_2_93_chunk._ts_meta_count, compress_hyper_2_93_chunk.segment_by_value, compress_hyper_2_93_chunk._ts_meta_min_1, compress_hyper_2_93_chunk._ts_meta_max_1, compress_hyper_2_93_chunk."time", compress_hyper_2_93_chunk.int_value, compress_hyper_2_93_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_84_chunk.segment_by_value) FILTER (WHERE (_hyper_1_84_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_84_chunk.segment_by_value) FILTER (WHERE (_hyper_1_84_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_84_chunk Output: _hyper_1_84_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_94_chunk Output: compress_hyper_2_94_chunk._ts_meta_count, compress_hyper_2_94_chunk.segment_by_value, compress_hyper_2_94_chunk._ts_meta_min_1, compress_hyper_2_94_chunk._ts_meta_max_1, compress_hyper_2_94_chunk."time", compress_hyper_2_94_chunk.int_value, compress_hyper_2_94_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_85_chunk.segment_by_value) FILTER (WHERE (_hyper_1_85_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_85_chunk.segment_by_value) FILTER (WHERE (_hyper_1_85_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_85_chunk Output: _hyper_1_85_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_95_chunk Output: compress_hyper_2_95_chunk._ts_meta_count, compress_hyper_2_95_chunk.segment_by_value, compress_hyper_2_95_chunk._ts_meta_min_1, compress_hyper_2_95_chunk._ts_meta_max_1, compress_hyper_2_95_chunk."time", compress_hyper_2_95_chunk.int_value, compress_hyper_2_95_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_86_chunk.segment_by_value) FILTER (WHERE (_hyper_1_86_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_86_chunk.segment_by_value) FILTER (WHERE (_hyper_1_86_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_86_chunk Output: _hyper_1_86_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_96_chunk Output: compress_hyper_2_96_chunk._ts_meta_count, compress_hyper_2_96_chunk.segment_by_value, compress_hyper_2_96_chunk._ts_meta_min_1, compress_hyper_2_96_chunk._ts_meta_max_1, compress_hyper_2_96_chunk."time", compress_hyper_2_96_chunk.int_value, compress_hyper_2_96_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_87_chunk.segment_by_value) FILTER (WHERE (_hyper_1_87_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_87_chunk.segment_by_value) FILTER (WHERE (_hyper_1_87_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_87_chunk Output: _hyper_1_87_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_97_chunk Output: compress_hyper_2_97_chunk._ts_meta_count, compress_hyper_2_97_chunk.segment_by_value, compress_hyper_2_97_chunk._ts_meta_min_1, compress_hyper_2_97_chunk._ts_meta_max_1, compress_hyper_2_97_chunk."time", compress_hyper_2_97_chunk.int_value, compress_hyper_2_97_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_88_chunk.segment_by_value) FILTER (WHERE (_hyper_1_88_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_88_chunk.segment_by_value) FILTER (WHERE (_hyper_1_88_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_88_chunk Output: _hyper_1_88_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_98_chunk Output: compress_hyper_2_98_chunk._ts_meta_count, compress_hyper_2_98_chunk.segment_by_value, compress_hyper_2_98_chunk._ts_meta_min_1, compress_hyper_2_98_chunk._ts_meta_max_1, compress_hyper_2_98_chunk."time", compress_hyper_2_98_chunk.int_value, compress_hyper_2_98_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_89_chunk.segment_by_value) FILTER (WHERE (_hyper_1_89_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_89_chunk.segment_by_value) FILTER (WHERE (_hyper_1_89_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_89_chunk Output: _hyper_1_89_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_99_chunk Output: compress_hyper_2_99_chunk._ts_meta_count, compress_hyper_2_99_chunk.segment_by_value, compress_hyper_2_99_chunk._ts_meta_min_1, compress_hyper_2_99_chunk._ts_meta_max_1, compress_hyper_2_99_chunk."time", compress_hyper_2_99_chunk.int_value, compress_hyper_2_99_chunk.float_value - -> Partial Aggregate - Output: PARTIAL sum(_hyper_1_90_chunk.segment_by_value) FILTER (WHERE (_hyper_1_90_chunk.segment_by_value > 99999)) + -> Custom Scan (VectorAgg) + Output: (PARTIAL sum(_hyper_1_90_chunk.segment_by_value) FILTER (WHERE (_hyper_1_90_chunk.segment_by_value > 99999))) + Grouping Policy: all compressed batches -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_90_chunk Output: _hyper_1_90_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_100_chunk Output: compress_hyper_2_100_chunk._ts_meta_count, compress_hyper_2_100_chunk.segment_by_value, compress_hyper_2_100_chunk._ts_meta_min_1, compress_hyper_2_100_chunk._ts_meta_max_1, compress_hyper_2_100_chunk."time", compress_hyper_2_100_chunk.int_value, compress_hyper_2_100_chunk.float_value -(66 rows) +(76 rows) SET timescaledb.enable_vectorized_aggregation = OFF; SELECT sum(segment_by_value) FILTER (WHERE segment_by_value > 99999) FROM testtable; @@ -3496,3 +3506,10 @@ SELECT sum(segment_by_value1) FROM testtable2 WHERE segment_by_value1 > 1000 AND (84 rows) RESET max_parallel_workers_per_gather; +-- Can't group by a system column +SELECT sum(float_value) FROM testtable2 GROUP BY tableoid ORDER BY 1 LIMIT 1; + sum +------- + 82620 +(1 row) + diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index 9cc7e62b84a..fae7a0609d2 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -114,6 +114,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) recompress_chunk_segmentwise.sql feature_flags.sql vector_agg_default.sql + vector_agg_filter.sql vector_agg_segmentby.sql) list( diff --git a/tsl/test/sql/vector_agg_filter.sql b/tsl/test/sql/vector_agg_filter.sql new file mode 100644 index 00000000000..316102ee940 --- /dev/null +++ b/tsl/test/sql/vector_agg_filter.sql @@ -0,0 +1,106 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +\c :TEST_DBNAME :ROLE_SUPERUSER +-- helper function: float -> pseudorandom float [-0.5..0.5] +CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ + SELECT hashfloat8(x::float8) / pow(2, 32) +$$ LANGUAGE SQL; + +\set CHUNKS 2::int +\set CHUNK_ROWS 100000::int +\set GROUPING_CARDINALITY 10::int + +create table aggfilter(t int, s int, + cint2 int2, dropped int4, cint4 int4); +select create_hypertable('aggfilter', 's', chunk_time_interval => :GROUPING_CARDINALITY / :CHUNKS); + +create view source as +select s * 10000 + t as t, + s, + case when t % 1051 = 0 then null + else (mix(s + t * 1019) * 32767)::int2 end as cint2, + 1 as dropped, + (mix(s + t * 1021) * 32767)::int4 as cint4 +from + generate_series(1::int, :CHUNK_ROWS * :CHUNKS / :GROUPING_CARDINALITY) t, + generate_series(0::int, :GROUPING_CARDINALITY - 1::int) s(s) +; + +insert into aggfilter select * from source where s = 1; + +alter table aggfilter set (timescaledb.compress, timescaledb.compress_orderby = 't', + timescaledb.compress_segmentby = 's'); + +select count(compress_chunk(x)) from show_chunks('aggfilter') x; + +alter table aggfilter add column ss int default 11; +alter table aggfilter drop column dropped; + +insert into aggfilter +select t, s, cint2, cint4, + case + -- null in entire batch + when s = 2 then null + -- null for some rows + when s = 3 and t % 1053 = 0 then null + -- for some rows same as default + when s = 4 and t % 1057 = 0 then 11 + -- not null for entire batch + else s + end as ss +from source where s != 1 +; +select count(compress_chunk(x)) from show_chunks('aggfilter') x; +vacuum freeze analyze aggfilter; + + + +set timescaledb.debug_require_vector_agg = 'require'; +---- Uncomment to generate reference. Note that there are minor discrepancies +---- on float4 due to different numeric stability in our and PG implementations. +--set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'allow'; + +select + format('%sselect %s%s(%s)%s from aggfilter%s%s%s;', + explain, + grouping || ', ', + function, variable, + ' filter (where ' || agg_filter || ')', + ' where ' || condition, + ' group by ' || grouping, + format(' order by %s(%s), ', function, variable) || grouping || ' limit 10', + function, variable) +from + unnest(array[ + 'explain (costs off) ', + null]) explain, + unnest(array[ + 's', + 'ss', + 'cint2', + 'cint4', + '*']) variable, + unnest(array[ + 'min', + 'count']) function, + unnest(array[ + null, + 'cint2 > 0', + 'cint2 is null']) with ordinality as condition(condition, n), + unnest(array[ + null, + 's']) with ordinality as grouping(grouping, n), + unnest(array[ + null, + 'cint2 < 0', + 'ss > 1000', + 'cint4 > 0', + 's != 5']) with ordinality as agg_filter(agg_filter, n) +where + true + and (explain is null /* or condition is null and grouping = 's' */) + and (variable != '*' or function = 'count') +order by explain, condition.n, variable, function, grouping.n, agg_filter.n +\gexec diff --git a/tsl/test/sql/vector_agg_param.sql b/tsl/test/sql/vector_agg_param.sql index 491a877556d..244718ef27f 100644 --- a/tsl/test/sql/vector_agg_param.sql +++ b/tsl/test/sql/vector_agg_param.sql @@ -18,11 +18,23 @@ select count(compress_chunk(x)) from show_chunks('pvagg') x; analyze pvagg; +-- Uncomment to generate reference +--set timescaledb.enable_vectorized_aggregation to off; -explain (costs off) + +explain (verbose, costs off) select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a) from pvagg where s = x) xx; select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a) from pvagg where s = x) xx; +explain (verbose, costs off) +select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a + x) from pvagg) xx; + +select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a + x) from pvagg) xx; + +-- The plan for this query differs after PG16, x is not used as grouping key but +-- just added into the output targetlist of partial aggregation nodes. +select * from unnest(array[0, 1, 2]::int[]) x, lateral (select sum(a) from pvagg group by x) xx; + drop table pvagg; diff --git a/tsl/test/sql/vectorized_aggregation.sql b/tsl/test/sql/vectorized_aggregation.sql index c8844932a93..bafecd6b544 100644 --- a/tsl/test/sql/vectorized_aggregation.sql +++ b/tsl/test/sql/vectorized_aggregation.sql @@ -403,3 +403,7 @@ SET max_parallel_workers_per_gather = 0; SELECT sum(segment_by_value1) FROM testtable2 WHERE segment_by_value1 > 1000 AND int_value > 1000; RESET max_parallel_workers_per_gather; + + +-- Can't group by a system column +SELECT sum(float_value) FROM testtable2 GROUP BY tableoid ORDER BY 1 LIMIT 1; From 9b0ee388f5c28049a113ff49cc0f3b0d9109eb49 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 28 Nov 2024 21:22:17 +0100 Subject: [PATCH 37/58] cleanups after merge --- tsl/src/nodes/vector_agg/plan.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index 5da5b713bac..ec95931c733 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -428,6 +428,7 @@ can_vectorize_aggref(Aggref *aggref, CustomScan *custom, VectorQualInfo *vqi) /* * Whether we can perform vectorized aggregation with a given grouping. + * Currently supports either no grouping or grouping by segmentby columns. */ static bool can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) @@ -496,6 +497,11 @@ has_vector_agg_node(Plan *plan, bool *has_normal_agg) append_plans = custom->custom_plans; } } + else if (IsA(plan, SubqueryScan)) + { + SubqueryScan *subquery = castNode(SubqueryScan, plan); + append_plans = list_make1(subquery->subplan); + } if (append_plans) { @@ -553,6 +559,11 @@ try_insert_vector_agg_node(Plan *plan) append_plans = custom->custom_plans; } } + else if (IsA(plan, SubqueryScan)) + { + SubqueryScan *subquery = castNode(SubqueryScan, plan); + append_plans = list_make1(subquery->subplan); + } if (append_plans) { From effa7eb81dc9bd419563e5407f43e5eef23c0f0f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:50:32 +0100 Subject: [PATCH 38/58] cleanup --- tsl/src/compression/arrow_c_data_interface.h | 25 ++++++++++++++------ tsl/test/expected/vector_agg_filter.out | 14 +++++++++-- tsl/test/sql/vector_agg_filter.sql | 12 ++++++++-- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/tsl/src/compression/arrow_c_data_interface.h b/tsl/src/compression/arrow_c_data_interface.h index 077217b14af..a1f989c99e2 100644 --- a/tsl/src/compression/arrow_c_data_interface.h +++ b/tsl/src/compression/arrow_c_data_interface.h @@ -192,7 +192,7 @@ arrow_combine_validity(size_t num_words, uint64 *restrict storage, const uint64 { /* * Any and all of the filters can be null. For simplicity, move the non-null - * filters to the front. + * filters to the leading positions. */ const uint64 *tmp; #define SWAP(X, Y) \ @@ -200,17 +200,28 @@ arrow_combine_validity(size_t num_words, uint64 *restrict storage, const uint64 (X) = (Y); \ (Y) = tmp; - if (filter2 == NULL) - { - SWAP(filter2, filter3); - } - if (filter1 == NULL) { - SWAP(filter1, filter2); + /* + * We have at least one NULL that goes to the last position. + */ + SWAP(filter1, filter3); + if (filter1 == NULL) + { + /* + * We have another NULL that goes to the second position. + */ + SWAP(filter1, filter2); + } + } + else + { if (filter2 == NULL) { + /* + * We have at least one NULL that goes to the last position. + */ SWAP(filter2, filter3); } } diff --git a/tsl/test/expected/vector_agg_filter.out b/tsl/test/expected/vector_agg_filter.out index 20d97efd1d4..b02db9b96e3 100644 --- a/tsl/test/expected/vector_agg_filter.out +++ b/tsl/test/expected/vector_agg_filter.out @@ -6,6 +6,7 @@ CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ SELECT hashfloat8(x::float8) / pow(2, 32) $$ LANGUAGE SQL; +create operator === (function = 'int4eq', rightarg = int4, leftarg = int4); \set CHUNKS 2::int \set CHUNK_ROWS 100000::int \set GROUPING_CARDINALITY 10::int @@ -62,8 +63,7 @@ select count(compress_chunk(x)) from show_chunks('aggfilter') x; vacuum freeze analyze aggfilter; set timescaledb.debug_require_vector_agg = 'require'; ----- Uncomment to generate reference. Note that there are minor discrepancies ----- on float4 due to different numeric stability in our and PG implementations. +---- Uncomment to generate reference. --set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'allow'; select format('%sselect %s%s(%s)%s from aggfilter%s%s%s;', @@ -2942,3 +2942,13 @@ select s, min(ss) filter (where s != 5) from aggfilter where cint2 is null group 2 | 11 (10 rows) +reset timescaledb.debug_require_vector_agg; +-- FILTER that is not vectorizable +set timescaledb.debug_require_vector_agg = 'forbid'; +select count(*) filter (where cint2 === 0) from aggfilter; + count +------- + 7 +(1 row) + +reset timescaledb.debug_require_vector_agg; diff --git a/tsl/test/sql/vector_agg_filter.sql b/tsl/test/sql/vector_agg_filter.sql index 316102ee940..558b4a54e89 100644 --- a/tsl/test/sql/vector_agg_filter.sql +++ b/tsl/test/sql/vector_agg_filter.sql @@ -8,6 +8,8 @@ CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ SELECT hashfloat8(x::float8) / pow(2, 32) $$ LANGUAGE SQL; +create operator === (function = 'int4eq', rightarg = int4, leftarg = int4); + \set CHUNKS 2::int \set CHUNK_ROWS 100000::int \set GROUPING_CARDINALITY 10::int @@ -58,8 +60,7 @@ vacuum freeze analyze aggfilter; set timescaledb.debug_require_vector_agg = 'require'; ----- Uncomment to generate reference. Note that there are minor discrepancies ----- on float4 due to different numeric stability in our and PG implementations. +---- Uncomment to generate reference. --set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'allow'; select @@ -104,3 +105,10 @@ where and (variable != '*' or function = 'count') order by explain, condition.n, variable, function, grouping.n, agg_filter.n \gexec + +reset timescaledb.debug_require_vector_agg; + +-- FILTER that is not vectorizable +set timescaledb.debug_require_vector_agg = 'forbid'; +select count(*) filter (where cint2 === 0) from aggfilter; +reset timescaledb.debug_require_vector_agg; From 8e6c6d23d3f60867ef08a349c464ea90ce07cb5d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:52:39 +0100 Subject: [PATCH 39/58] changelog --- .unreleased/vectorized-agg-filter | 1 + 1 file changed, 1 insertion(+) create mode 100644 .unreleased/vectorized-agg-filter diff --git a/.unreleased/vectorized-agg-filter b/.unreleased/vectorized-agg-filter new file mode 100644 index 00000000000..79b88afb3db --- /dev/null +++ b/.unreleased/vectorized-agg-filter @@ -0,0 +1 @@ +Implements: #7458 Support vecorized aggregation with aggregate FILTER clauses that are also vectorizable From b717f74c6b27e0e1e45c44ef971dcb04ef6b5b84 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:04:59 +0100 Subject: [PATCH 40/58] constify stable expressions --- tsl/src/nodes/vector_agg/exec.c | 15 ++++++++++++++- tsl/test/expected/vector_agg_filter.out | 11 +++++++++++ tsl/test/sql/vector_agg_filter.sql | 9 ++++++++- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index 5870723d397..68b6c244a05 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "nodes/vector_agg/exec.h" @@ -68,6 +69,17 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) DecompressChunkState *decompress_state = (DecompressChunkState *) linitial(vector_agg_state->custom.custom_ps); + /* + * Set up the helper structures used to evaluate stable expressions in + * vectorized FILTER clauses. + */ + PlannerGlobal glob = { + .boundParams = node->ss.ps.state->es_param_list_info, + }; + PlannerInfo root = { + .glob = &glob, + }; + /* * The aggregated targetlist with Aggrefs is in the custom scan targetlist * of the custom scan node that is performing the vectorized aggregation. @@ -153,7 +165,8 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) if (aggref->aggfilter != NULL) { - def->filter_clauses = list_make1(aggref->aggfilter); + Node *constified = estimate_expression_value(&root, (Node *) aggref->aggfilter); + def->filter_clauses = list_make1(constified); } } else diff --git a/tsl/test/expected/vector_agg_filter.out b/tsl/test/expected/vector_agg_filter.out index b02db9b96e3..13629d3e4b3 100644 --- a/tsl/test/expected/vector_agg_filter.out +++ b/tsl/test/expected/vector_agg_filter.out @@ -6,7 +6,10 @@ CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ SELECT hashfloat8(x::float8) / pow(2, 32) $$ LANGUAGE SQL; +-- non-vectorizable equality operator create operator === (function = 'int4eq', rightarg = int4, leftarg = int4); +-- an abs() function that is stable not immutable +create function stable_abs(x int4) returns int4 as 'int4abs' language internal stable; \set CHUNKS 2::int \set CHUNK_ROWS 100000::int \set GROUPING_CARDINALITY 10::int @@ -2951,4 +2954,12 @@ select count(*) filter (where cint2 === 0) from aggfilter; 7 (1 row) +-- FILTER with stable function +set timescaledb.debug_require_vector_agg = 'require'; +select count(*) filter (where cint2 = stable_abs(0)) from aggfilter; + count +------- + 7 +(1 row) + reset timescaledb.debug_require_vector_agg; diff --git a/tsl/test/sql/vector_agg_filter.sql b/tsl/test/sql/vector_agg_filter.sql index 558b4a54e89..d3423d84eac 100644 --- a/tsl/test/sql/vector_agg_filter.sql +++ b/tsl/test/sql/vector_agg_filter.sql @@ -7,8 +7,10 @@ CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ SELECT hashfloat8(x::float8) / pow(2, 32) $$ LANGUAGE SQL; - +-- non-vectorizable equality operator create operator === (function = 'int4eq', rightarg = int4, leftarg = int4); +-- an abs() function that is stable not immutable +create function stable_abs(x int4) returns int4 as 'int4abs' language internal stable; \set CHUNKS 2::int \set CHUNK_ROWS 100000::int @@ -111,4 +113,9 @@ reset timescaledb.debug_require_vector_agg; -- FILTER that is not vectorizable set timescaledb.debug_require_vector_agg = 'forbid'; select count(*) filter (where cint2 === 0) from aggfilter; +-- FILTER with stable function +set timescaledb.debug_require_vector_agg = 'require'; +select count(*) filter (where cint2 = stable_abs(0)) from aggfilter; + reset timescaledb.debug_require_vector_agg; + From 47bcaa956558e0ed3540c5946ebf48fb2d8c33b1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:12:23 +0100 Subject: [PATCH 41/58] updates --- tsl/src/nodes/vector_agg/CMakeLists.txt | 1 + tsl/src/nodes/vector_agg/exec.c | 46 +- tsl/src/nodes/vector_agg/exec.h | 1 + .../function/agg_many_vector_helper.c | 45 +- .../function/float48_accum_single.c | 2 + tsl/src/nodes/vector_agg/function/functions.c | 70 +- tsl/src/nodes/vector_agg/function/functions.h | 16 + .../vector_agg/function/int128_accum_single.c | 1 + .../function/int24_avg_accum_single.c | 1 + .../vector_agg/function/int24_sum_single.c | 1 + .../function/minmax_arithmetic_single.c | 2 + .../vector_agg/function/sum_float_single.c | 1 + tsl/src/nodes/vector_agg/grouping_policy.h | 7 + .../nodes/vector_agg/grouping_policy_hash.c | 734 ++-- .../nodes/vector_agg/grouping_policy_hash.h | 158 + .../nodes/vector_agg/hashing/CMakeLists.txt | 6 + .../vector_agg/hashing/batch_hashing_params.h | 55 + tsl/src/nodes/vector_agg/hashing/hash64.h | 36 + .../vector_agg/hashing/hash_strategy_common.c | 52 + .../vector_agg/hashing/hash_strategy_impl.c | 281 ++ .../hash_strategy_impl_single_fixed_key.c | 70 + .../hashing/hash_strategy_single_fixed_2.c | 32 + .../hashing/hash_strategy_single_fixed_4.c | 32 + .../hashing/hash_strategy_single_fixed_8.c | 32 + .../vector_agg/hashing/hashing_strategy.h | 64 + .../vector_agg/hashing/template_helper.h | 10 + tsl/src/nodes/vector_agg/plan.c | 6 +- tsl/test/expected/vector_agg_functions.out | 3094 ++++++++++++++++- tsl/test/sql/vector_agg_functions.sql | 14 +- 29 files changed, 4309 insertions(+), 561 deletions(-) create mode 100644 tsl/src/nodes/vector_agg/grouping_policy_hash.h create mode 100644 tsl/src/nodes/vector_agg/hashing/CMakeLists.txt create mode 100644 tsl/src/nodes/vector_agg/hashing/batch_hashing_params.h create mode 100644 tsl/src/nodes/vector_agg/hashing/hash64.h create mode 100644 tsl/src/nodes/vector_agg/hashing/hash_strategy_common.c create mode 100644 tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c create mode 100644 tsl/src/nodes/vector_agg/hashing/hash_strategy_impl_single_fixed_key.c create mode 100644 tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_2.c create mode 100644 tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c create mode 100644 tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_8.c create mode 100644 tsl/src/nodes/vector_agg/hashing/hashing_strategy.h create mode 100644 tsl/src/nodes/vector_agg/hashing/template_helper.h diff --git a/tsl/src/nodes/vector_agg/CMakeLists.txt b/tsl/src/nodes/vector_agg/CMakeLists.txt index c3a85bbd30f..cf69755c077 100644 --- a/tsl/src/nodes/vector_agg/CMakeLists.txt +++ b/tsl/src/nodes/vector_agg/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(function) +add_subdirectory(hashing) set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/exec.c ${CMAKE_CURRENT_SOURCE_DIR}/grouping_policy_batch.c diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index 1d773ce1c4e..bfd46aa7863 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -160,17 +160,51 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) Var *var = castNode(Var, tlentry->expr); col->input_offset = get_input_offset(decompress_state, var); + DecompressContext *dcontext = &decompress_state->decompress_context; + CompressionColumnDescription *desc = + &dcontext->compressed_chunk_columns[col->input_offset]; + col->value_bytes = desc->value_bytes; } } /* - * Currently the only grouping policy we use is per-batch grouping. + * Determine which grouping policy we are going to use. */ - vector_agg_state->grouping = - create_grouping_policy_batch(vector_agg_state->num_agg_defs, - vector_agg_state->agg_defs, - vector_agg_state->num_grouping_columns, - vector_agg_state->grouping_columns); + bool all_segmentby = true; + for (int i = 0; i < vector_agg_state->num_grouping_columns; i++) + { + GroupingColumn *col = &vector_agg_state->grouping_columns[i]; + DecompressContext *dcontext = &decompress_state->decompress_context; + CompressionColumnDescription *desc = &dcontext->compressed_chunk_columns[col->input_offset]; + if (desc->type != SEGMENTBY_COLUMN) + { + all_segmentby = false; + break; + } + } + + if (all_segmentby) + { + /* + * Per-batch grouping. + */ + vector_agg_state->grouping = + create_grouping_policy_batch(vector_agg_state->num_agg_defs, + vector_agg_state->agg_defs, + vector_agg_state->num_grouping_columns, + vector_agg_state->grouping_columns); + } + else + { + /* + * Hash grouping. + */ + vector_agg_state->grouping = + create_grouping_policy_hash(vector_agg_state->num_agg_defs, + vector_agg_state->agg_defs, + vector_agg_state->num_grouping_columns, + vector_agg_state->grouping_columns); + } } static void diff --git a/tsl/src/nodes/vector_agg/exec.h b/tsl/src/nodes/vector_agg/exec.h index 26f832b3548..e153a287dfd 100644 --- a/tsl/src/nodes/vector_agg/exec.h +++ b/tsl/src/nodes/vector_agg/exec.h @@ -24,6 +24,7 @@ typedef struct GroupingColumn { int input_offset; int output_offset; + int value_bytes; } GroupingColumn; typedef struct diff --git a/tsl/src/nodes/vector_agg/function/agg_many_vector_helper.c b/tsl/src/nodes/vector_agg/function/agg_many_vector_helper.c index 47916e29131..192fad35017 100644 --- a/tsl/src/nodes/vector_agg/function/agg_many_vector_helper.c +++ b/tsl/src/nodes/vector_agg/function/agg_many_vector_helper.c @@ -10,24 +10,49 @@ * FUNCTION_NAME(one) function, which adds one passing non-null row to the given * aggregate function state. */ -static void -FUNCTION_NAME(many_vector)(void *restrict agg_states, uint32 *restrict offsets, int start_row, - int end_row, const ArrowArray *vector, MemoryContext agg_extra_mctx) +static pg_attribute_always_inline void +FUNCTION_NAME(many_vector_impl)(void *restrict agg_states, const uint32 *offsets, + const uint64 *filter, int start_row, int end_row, + const ArrowArray *vector, MemoryContext agg_extra_mctx) { - MemoryContext old = MemoryContextSwitchTo(agg_extra_mctx); + FUNCTION_NAME(state) *restrict states = (FUNCTION_NAME(state) *) agg_states; const CTYPE *values = vector->buffers[1]; - const uint64 *valid = vector->buffers[0]; + MemoryContext old = MemoryContextSwitchTo(agg_extra_mctx); for (int row = start_row; row < end_row; row++) { - FUNCTION_NAME(state) *state = (offsets[row] + (FUNCTION_NAME(state) *) agg_states); const CTYPE value = values[row]; - const bool row_passes = (offsets[row] != 0); - const bool value_notnull = arrow_row_is_valid(valid, row); - - if (row_passes && value_notnull) + FUNCTION_NAME(state) *restrict state = &states[offsets[row]]; + if (arrow_row_is_valid(filter, row)) { + Assert(offsets[row] != 0); FUNCTION_NAME(one)(state, value); } } MemoryContextSwitchTo(old); } + +static pg_noinline void +FUNCTION_NAME(many_vector_all_valid)(void *restrict agg_states, const uint32 *offsets, + int start_row, int end_row, const ArrowArray *vector, + MemoryContext agg_extra_mctx) +{ + FUNCTION_NAME(many_vector_impl) + (agg_states, offsets, NULL, start_row, end_row, vector, agg_extra_mctx); +} + +static void +FUNCTION_NAME(many_vector)(void *restrict agg_states, const uint32 *offsets, const uint64 *filter, + int start_row, int end_row, const ArrowArray *vector, + MemoryContext agg_extra_mctx) +{ + if (filter == NULL) + { + FUNCTION_NAME(many_vector_all_valid) + (agg_states, offsets, start_row, end_row, vector, agg_extra_mctx); + } + else + { + FUNCTION_NAME(many_vector_impl) + (agg_states, offsets, filter, start_row, end_row, vector, agg_extra_mctx); + } +} diff --git a/tsl/src/nodes/vector_agg/function/float48_accum_single.c b/tsl/src/nodes/vector_agg/function/float48_accum_single.c index 0bdcd928593..eba81ee687b 100644 --- a/tsl/src/nodes/vector_agg/function/float48_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/float48_accum_single.c @@ -316,6 +316,7 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) state->Sx = newSx; } +#include "agg_many_vector_helper.c" #include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" @@ -325,6 +326,7 @@ VectorAggFunctions FUNCTION_NAME(argdef) = { .agg_emit = FUNCTION_NAME(emit), .agg_scalar = FUNCTION_NAME(scalar), .agg_vector = FUNCTION_NAME(vector), + .agg_many_vector = FUNCTION_NAME(many_vector), }; #undef UPDATE #undef COMBINE diff --git a/tsl/src/nodes/vector_agg/function/functions.c b/tsl/src/nodes/vector_agg/function/functions.c index fad42edc293..bb6ef7cf074 100644 --- a/tsl/src/nodes/vector_agg/function/functions.c +++ b/tsl/src/nodes/vector_agg/function/functions.c @@ -52,20 +52,61 @@ count_star_scalar(void *agg_state, Datum constvalue, bool constisnull, int n, state->count += n; } -static void -count_star_many_scalar(void *restrict agg_states, uint32 *restrict offsets, int start_row, - int end_row, Datum constvalue, bool constisnull, - MemoryContext agg_extra_mctx) +static pg_attribute_always_inline void +count_star_many_scalar_impl(void *restrict agg_states, const uint32 *offsets, const uint64 *filter, + int start_row, int end_row, Datum constvalue, bool constisnull, + MemoryContext agg_extra_mctx) { CountState *states = (CountState *) agg_states; for (int row = start_row; row < end_row; row++) { - if (offsets[row] == 0) + if (arrow_row_is_valid(filter, row)) { - continue; + states[offsets[row]].count++; } + } +} - states[offsets[row]].count++; +static pg_noinline void +count_star_many_scalar_nofilter(void *restrict agg_states, const uint32 *offsets, int start_row, + int end_row, Datum constvalue, bool constisnull, + MemoryContext agg_extra_mctx) +{ + count_star_many_scalar_impl(agg_states, + offsets, + NULL, + start_row, + end_row, + constvalue, + constisnull, + agg_extra_mctx); +} + +static void +count_star_many_scalar(void *restrict agg_states, const uint32 *offsets, const uint64 *filter, + int start_row, int end_row, Datum constvalue, bool constisnull, + MemoryContext agg_extra_mctx) +{ + if (filter == NULL) + { + count_star_many_scalar_nofilter(agg_states, + offsets, + start_row, + end_row, + constvalue, + constisnull, + agg_extra_mctx); + } + else + { + count_star_many_scalar_impl(agg_states, + offsets, + filter, + start_row, + end_row, + constvalue, + constisnull, + agg_extra_mctx); } } @@ -94,8 +135,8 @@ count_any_scalar(void *agg_state, Datum constvalue, bool constisnull, int n, } static void -count_any_many_vector(void *agg_state, const ArrowArray *vector, const uint64 *filter, - MemoryContext agg_extra_mctx) +count_any_vector(void *agg_state, const ArrowArray *vector, const uint64 *filter, + MemoryContext agg_extra_mctx) { CountState *state = (CountState *) agg_state; const int n = vector->length; @@ -129,16 +170,14 @@ count_any_many_vector(void *agg_state, const ArrowArray *vector, const uint64 *f } static void -count_any_many(void *restrict agg_states, uint32 *restrict offsets, int start_row, int end_row, - const ArrowArray *vector, MemoryContext agg_extra_mctx) +count_any_many_vector(void *restrict agg_states, const uint32 *offsets, const uint64 *filter, + int start_row, int end_row, const ArrowArray *vector, + MemoryContext agg_extra_mctx) { - const uint64 *valid = vector->buffers[0]; for (int row = start_row; row < end_row; row++) { CountState *state = (offsets[row] + (CountState *) agg_states); - const bool row_passes = (offsets[row] != 0); - const bool value_notnull = arrow_row_is_valid(valid, row); - if (row_passes && value_notnull) + if (arrow_row_is_valid(filter, row)) { state->count++; } @@ -151,6 +190,7 @@ VectorAggFunctions count_any_agg = { .agg_emit = count_emit, .agg_scalar = count_any_scalar, .agg_vector = count_any_vector, + .agg_many_vector = count_any_many_vector, }; /* diff --git a/tsl/src/nodes/vector_agg/function/functions.h b/tsl/src/nodes/vector_agg/function/functions.h index cd05fe4ea5a..5845333d79d 100644 --- a/tsl/src/nodes/vector_agg/function/functions.h +++ b/tsl/src/nodes/vector_agg/function/functions.h @@ -37,6 +37,22 @@ typedef struct void (*agg_scalar)(void *restrict agg_state, Datum constvalue, bool constisnull, int n, MemoryContext agg_extra_mctx); + /* + * Add the rows of the given arrow array to aggregate function states given + * by the respective offsets. + */ + void (*agg_many_vector)(void *restrict agg_states, const uint32 *offsets, const uint64 *filter, + int start_row, int end_row, const ArrowArray *vector, + MemoryContext agg_extra_mctx); + + /* + * Same as above, but for a scalar argument. This is mostly important for + * count(*) and can be NULL. + */ + void (*agg_many_scalar)(void *restrict agg_states, const uint32 *offsets, const uint64 *filter, + int start_row, int end_row, Datum constvalue, bool constisnull, + MemoryContext agg_extra_mctx); + /* Emit a partial aggregation result. */ void (*agg_emit)(void *restrict agg_state, Datum *out_result, bool *out_isnull); } VectorAggFunctions; diff --git a/tsl/src/nodes/vector_agg/function/int128_accum_single.c b/tsl/src/nodes/vector_agg/function/int128_accum_single.c index 8fdae03917e..2e0b9ca25f2 100644 --- a/tsl/src/nodes/vector_agg/function/int128_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/int128_accum_single.c @@ -110,6 +110,7 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) #endif } +#include "agg_many_vector_helper.c" #include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" diff --git a/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c b/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c index 7659c74b181..fb90f116255 100644 --- a/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c +++ b/tsl/src/nodes/vector_agg/function/int24_avg_accum_single.c @@ -38,6 +38,7 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) state->sum += value; } +#include "agg_many_vector_helper.c" #include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" diff --git a/tsl/src/nodes/vector_agg/function/int24_sum_single.c b/tsl/src/nodes/vector_agg/function/int24_sum_single.c index 56023517c61..95320240e75 100644 --- a/tsl/src/nodes/vector_agg/function/int24_sum_single.c +++ b/tsl/src/nodes/vector_agg/function/int24_sum_single.c @@ -61,6 +61,7 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) typedef Int24SumState FUNCTION_NAME(state); +#include "agg_many_vector_helper.c" #include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" diff --git a/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c b/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c index e21658c4dfe..e64dab8e20c 100644 --- a/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c +++ b/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c @@ -58,6 +58,7 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) } } +#include "agg_many_vector_helper.c" #include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" @@ -67,6 +68,7 @@ VectorAggFunctions FUNCTION_NAME(argdef) = { .agg_emit = minmax_emit, .agg_scalar = FUNCTION_NAME(scalar), .agg_vector = FUNCTION_NAME(vector), + .agg_many_vector = FUNCTION_NAME(many_vector), }; #endif diff --git a/tsl/src/nodes/vector_agg/function/sum_float_single.c b/tsl/src/nodes/vector_agg/function/sum_float_single.c index 88a74a9b122..6f3002fd12f 100644 --- a/tsl/src/nodes/vector_agg/function/sum_float_single.c +++ b/tsl/src/nodes/vector_agg/function/sum_float_single.c @@ -91,6 +91,7 @@ FUNCTION_NAME(one)(void *restrict agg_state, const CTYPE value) state->result += value; } +#include "agg_many_vector_helper.c" #include "agg_scalar_helper.c" #include "agg_vector_validity_helper.c" diff --git a/tsl/src/nodes/vector_agg/grouping_policy.h b/tsl/src/nodes/vector_agg/grouping_policy.h index 98bcbbed315..e7c5af909d2 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy.h +++ b/tsl/src/nodes/vector_agg/grouping_policy.h @@ -28,6 +28,9 @@ typedef struct GroupingPolicy */ void (*gp_reset)(GroupingPolicy *gp); + /* + * Aggregate a single compressed batch. + */ void (*gp_add_batch)(GroupingPolicy *gp, DecompressBatchState *batch_state); /* @@ -54,3 +57,7 @@ typedef struct GroupingPolicy extern GroupingPolicy *create_grouping_policy_batch(int num_agg_defs, VectorAggDef *agg_defs, int num_grouping_columns, GroupingColumn *grouping_columns); + +extern GroupingPolicy *create_grouping_policy_hash(int num_agg_defs, VectorAggDef *agg_defs, + int num_grouping_columns, + GroupingColumn *grouping_columns); diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 0a8258efcb2..12e0f09700e 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -19,6 +19,8 @@ #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/vector_agg/exec.h" +#include "grouping_policy_hash.h" + #ifdef USE_FLOAT8_BYVAL #define DEBUG_LOG(MSG, ...) elog(DEBUG3, MSG, __VA_ARGS__) #else @@ -29,146 +31,64 @@ #define DEBUG_LOG(...) #endif -/* - * We can use crc32 as a hash function, it has bad properties but takes only one - * cycle, which is why it is sometimes used in the existing hash table - * implementations. - */ -#ifdef USE_SSE42_CRC32C -#include -static pg_attribute_always_inline uint64 -hash64(uint64 x) -{ - return _mm_crc32_u64(~0ULL, x); -} - -#else -/* - * When we don't have the crc32 instruction, use the SplitMix64 finalizer. - */ -static pg_attribute_always_inline uint64 -hash64(uint64 x) -{ - x ^= x >> 30; - x *= 0xbf58476d1ce4e5b9U; - x ^= x >> 27; - x *= 0x94d049bb133111ebU; - x ^= x >> 31; - return x; -} -#endif - -/* - * For the hash table, use the generic Datum key that is mapped to the aggregate - * state index. - */ -typedef struct -{ - Datum key; - uint32 status; - uint32 agg_state_index; -} HashEntry; - -#define SH_PREFIX h -#define SH_ELEMENT_TYPE HashEntry -#define SH_KEY_TYPE Datum -#define SH_KEY key -#define SH_HASH_KEY(tb, key) hash64(key) -#define SH_EQUAL(tb, a, b) a == b -#define SH_SCOPE static inline -#define SH_DECLARE -#define SH_DEFINE -#include - -/* - * Hash grouping policy. - */ -typedef struct -{ - /* - * We're using data inheritance from the GroupingPolicy. - */ - GroupingPolicy funcs; - - List *agg_defs; - List *output_grouping_columns; - - /* - * The hash table we use for grouping. - */ - struct h_hash *table; - - /* - * We have to track whether we are in the mode of returning the partial - * aggregation results, and also use a hash table iterator to track our - * progress between emit() calls. - */ - bool returning_results; - struct h_iterator iter; - - /* - * In single-column grouping, we store the null key outside of the hash - * table, and it has a reserved aggregate state index 1. We also reset this - * flag after we output the null key during iteration. - */ - bool have_null_key; - - /* - * A memory context for aggregate functions to allocate additional data, - * i.e. if they store strings or float8 datum on 32-bit systems. Valid until - * the grouping policy is reset. - */ - MemoryContext agg_extra_mctx; - - /* - * Temporary storage of aggregate state offsets for a given batch. We keep - * it in the policy because it is potentially too big to keep on stack, and - * we don't want to reallocate it each batch. - */ - uint32 *offsets; - uint64 num_allocated_offsets; - - /* - * Storage of aggregate function states, each List entry is the array of - * states for the respective function from agg_defs. The state index 0 is - * invalid, and the state index 1 is reserved for a null key. - */ - List *per_agg_states; - uint64 allocated_aggstate_rows; - - /* - * Some statistics for debugging. - */ - uint64 stat_input_total_rows; - uint64 stat_input_valid_rows; - uint64 stat_bulk_filtered_rows; -} GroupingPolicyHash; +extern HashingStrategy single_fixed_2_strategy; +extern HashingStrategy single_fixed_4_strategy; +extern HashingStrategy single_fixed_8_strategy; +extern HashingStrategy single_text_strategy; +extern HashingStrategy serialized_strategy; static const GroupingPolicy grouping_policy_hash_functions; GroupingPolicy * -create_grouping_policy_hash(List *agg_defs, List *output_grouping_columns) +create_grouping_policy_hash(int num_agg_defs, VectorAggDef *agg_defs, int num_grouping_columns, + GroupingColumn *grouping_columns) { GroupingPolicyHash *policy = palloc0(sizeof(GroupingPolicyHash)); policy->funcs = grouping_policy_hash_functions; - policy->output_grouping_columns = output_grouping_columns; - policy->agg_defs = agg_defs; + + policy->num_grouping_columns = num_grouping_columns; + policy->grouping_columns = grouping_columns; + policy->agg_extra_mctx = AllocSetContextCreate(CurrentMemoryContext, "agg extra", ALLOCSET_DEFAULT_SIZES); - policy->allocated_aggstate_rows = TARGET_COMPRESSED_BATCH_SIZE; - ListCell *lc; - foreach (lc, agg_defs) + policy->num_allocated_per_key_agg_states = TARGET_COMPRESSED_BATCH_SIZE; + + policy->num_agg_defs = num_agg_defs; + policy->agg_defs = agg_defs; + + policy->per_agg_per_key_states = + palloc(sizeof(*policy->per_agg_per_key_states) * policy->num_agg_defs); + for (int i = 0; i < policy->num_agg_defs; i++) { - VectorAggDef *agg_def = lfirst(lc); - policy->per_agg_states = - lappend(policy->per_agg_states, - palloc0(agg_def->func.state_bytes * policy->allocated_aggstate_rows)); + const VectorAggDef *agg_def = &policy->agg_defs[i]; + policy->per_agg_per_key_states[i] = + palloc(agg_def->func.state_bytes * policy->num_allocated_per_key_agg_states); } - policy->table = h_create(CurrentMemoryContext, policy->allocated_aggstate_rows, NULL); - policy->have_null_key = false; + policy->current_batch_grouping_column_values = + palloc(sizeof(CompressedColumnValues) * num_grouping_columns); - policy->returning_results = false; + Assert(num_grouping_columns == 1); + const GroupingColumn *g = &policy->grouping_columns[0]; + switch (g->value_bytes) + { + case 8: + policy->hashing = single_fixed_8_strategy; + break; + case 4: + policy->hashing = single_fixed_4_strategy; + break; + case 2: + policy->hashing = single_fixed_2_strategy; + break; + default: + Assert(false); + break; + } + + policy->hashing.key_body_mctx = policy->agg_extra_mctx; + + policy->hashing.init(&policy->hashing, policy); return &policy->funcs; } @@ -182,36 +102,49 @@ gp_hash_reset(GroupingPolicy *obj) policy->returning_results = false; - h_reset(policy->table); - policy->have_null_key = false; + policy->hashing.reset(&policy->hashing); + + /* + * Have to reset this because it's in the key body context which is also + * reset here. + */ + policy->tmp_key_storage = NULL; + policy->num_tmp_key_storage_bytes = 0; + + policy->last_used_key_index = 0; policy->stat_input_valid_rows = 0; policy->stat_input_total_rows = 0; - policy->stat_bulk_filtered_rows = 0; + policy->stat_consecutive_keys = 0; } static void -compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int end_row, - VectorAggDef *agg_def, void *agg_states, uint32 *offsets, - MemoryContext agg_extra_mctx) +compute_single_aggregate(GroupingPolicyHash *policy, const DecompressBatchState *batch_state, + int start_row, int end_row, const VectorAggDef *agg_def, void *agg_states) { - ArrowArray *arg_arrow = NULL; + const ArrowArray *arg_arrow = NULL; + const uint64 *arg_validity_bitmap = NULL; Datum arg_datum = 0; bool arg_isnull = true; + const uint32 *offsets = policy->key_index_for_row; + MemoryContext agg_extra_mctx = policy->agg_extra_mctx; + /* * We have functions with one argument, and one function with no arguments * (count(*)). Collect the arguments. */ if (agg_def->input_offset >= 0) { - CompressedColumnValues *values = &batch_state->compressed_columns[agg_def->input_offset]; + const CompressedColumnValues *values = + &batch_state->compressed_columns[agg_def->input_offset]; Assert(values->decompression_type != DT_Invalid); Assert(values->decompression_type != DT_Iterator); if (values->arrow != NULL) { arg_arrow = values->arrow; + arg_validity_bitmap = values->buffers[0]; } else { @@ -221,14 +154,28 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e } } + /* + * Compute the unified validity bitmap. + */ + const size_t num_words = (batch_state->total_batch_rows + 63) / 64; + const uint64 *filter = arrow_combine_validity(num_words, + policy->tmp_filter, + batch_state->vector_qual_result, + arg_validity_bitmap); + /* * Now call the function. */ if (arg_arrow != NULL) { /* Arrow argument. */ - agg_def->func - .agg_many_vector(agg_states, offsets, start_row, end_row, arg_arrow, agg_extra_mctx); + agg_def->func.agg_many_vector(agg_states, + offsets, + filter, + start_row, + end_row, + arg_arrow, + agg_extra_mctx); } else { @@ -240,6 +187,7 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e { agg_def->func.agg_many_scalar(agg_states, offsets, + filter, start_row, end_row, arg_datum, @@ -250,7 +198,7 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e { for (int i = start_row; i < end_row; i++) { - if (offsets[i] == 0) + if (!arrow_row_is_valid(filter, i)) { continue; } @@ -262,230 +210,80 @@ compute_single_aggregate(DecompressBatchState *batch_state, int start_row, int e } } -/* - * Fill the aggregation state offsets for all rows using a hash table. - */ -static pg_attribute_always_inline uint32 -fill_offsets_impl(GroupingPolicyHash *policy, CompressedColumnValues column, - const uint64 *restrict filter, uint32 next_unused_state_index, int start_row, - int end_row, uint32 *restrict offsets, - void (*get_key)(CompressedColumnValues column, int row, Datum *restrict key, - bool *restrict valid)) -{ - struct h_hash *restrict table = policy->table; - for (int row = start_row; row < end_row; row++) - { - bool key_valid = false; - Datum key = { 0 }; - get_key(column, row, &key, &key_valid); - - if (!arrow_row_is_valid(filter, row)) - { - continue; - } - - if (key_valid) - { - bool found = false; - HashEntry *restrict entry = h_insert(table, key, &found); - if (!found) - { - entry->agg_state_index = next_unused_state_index++; - } - offsets[row] = entry->agg_state_index; - } - else - { - policy->have_null_key = true; - offsets[row] = 1; - } - } - - return next_unused_state_index; -} - -/* - * This function exists just to nudge the compiler to generate simplified - * implementation for the important case where the entire batch matches and the - * key has no null values. - */ -static pg_attribute_always_inline uint32 -fill_offsets_dispatch(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, int start_row, - int end_row, uint32 *restrict offsets, - void (*get_key)(CompressedColumnValues column, int row, Datum *restrict key, - bool *restrict valid)) -{ - CompressedColumnValues column = batch_state->compressed_columns[key_column_index]; - const uint64 *restrict filter = batch_state->vector_qual_result; - - if (filter == NULL && column.buffers[0] == NULL) - { - next_unused_state_index = fill_offsets_impl(policy, - column, - filter, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key); - } - else if (filter != NULL && column.buffers[0] == NULL) - { - next_unused_state_index = fill_offsets_impl(policy, - column, - filter, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key); - } - else if (filter == NULL && column.buffers[0] != NULL) - { - next_unused_state_index = fill_offsets_impl(policy, - column, - filter, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key); - } - else if (filter != NULL && column.buffers[0] != NULL) - { - next_unused_state_index = fill_offsets_impl(policy, - column, - filter, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key); - } - else - { - Assert(false); - } - - policy->stat_input_total_rows += batch_state->total_batch_rows; - policy->stat_input_valid_rows += arrow_num_valid(filter, batch_state->total_batch_rows); - return next_unused_state_index; -} - -/* - * Functions to get the key value from the decompressed column, depending on its - * width and whether it's a scalar column. - */ -static pg_attribute_always_inline void -get_key_scalar(CompressedColumnValues column, int row, Datum *restrict key, bool *restrict valid) -{ - Assert(column.decompression_type == DT_Scalar); - *key = *column.output_value; - *valid = !*column.output_isnull; -} - -static pg_attribute_always_inline void -get_key_arrow_fixed(CompressedColumnValues column, int row, int key_bytes, Datum *restrict key, - bool *restrict valid) -{ - Assert(column.decompression_type == key_bytes); - const void *values = column.buffers[1]; - const uint64 *key_validity = column.buffers[0]; - *valid = arrow_row_is_valid(key_validity, row); - memcpy(key, key_bytes * row + (char *) values, key_bytes); -} - -static pg_attribute_always_inline void -get_key_arrow_fixed_2(CompressedColumnValues column, int row, Datum *restrict key, - bool *restrict valid) +static void +add_one_range(GroupingPolicyHash *policy, DecompressBatchState *batch_state, const int start_row, + const int end_row) { - get_key_arrow_fixed(column, row, 2, key, valid); -} + const int num_fns = policy->num_agg_defs; -static pg_attribute_always_inline void -get_key_arrow_fixed_4(CompressedColumnValues column, int row, Datum *restrict key, - bool *restrict valid) -{ - get_key_arrow_fixed(column, row, 4, key, valid); -} + Assert(start_row < end_row); + Assert(end_row <= batch_state->total_batch_rows); -static pg_attribute_always_inline void -get_key_arrow_fixed_8(CompressedColumnValues column, int row, Datum *restrict key, - bool *restrict valid) -{ -#ifndef USE_FLOAT8_BYVAL /* - * Shouldn't be called for this configuration, because we only use this - * grouping strategy for by-value types. + * Remember which aggregation states have already existed, and which we + * have to initialize. State index zero is invalid. */ - Assert(false); -#endif + const uint32 last_initialized_key_index = policy->last_used_key_index; + Assert(last_initialized_key_index <= policy->num_allocated_per_key_agg_states); - get_key_arrow_fixed(column, row, 8, key, valid); -} + /* + * Match rows to aggregation states using a hash table. + */ + Assert((size_t) end_row <= policy->num_key_index_for_row); + policy->hashing.fill_offsets(policy, batch_state, start_row, end_row); -/* - * Implementation of bulk hashing specialized for a given key width. - */ -static pg_noinline uint32 -fill_offsets_arrow_fixed_8(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, int start_row, - int end_row, uint32 *restrict offsets) -{ - return fill_offsets_dispatch(policy, - batch_state, - key_column_index, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key_arrow_fixed_8); -} + /* + * Process the aggregate function states. We are processing single aggregate + * function for the entire batch to improve the memory locality. + */ + const uint64 new_aggstate_rows = policy->num_allocated_per_key_agg_states * 2 + 1; + for (int agg_index = 0; agg_index < num_fns; agg_index++) + { + const VectorAggDef *agg_def = &policy->agg_defs[agg_index]; + /* + * If we added new keys, initialize the aggregate function states for + * them. + */ + if (policy->last_used_key_index > last_initialized_key_index) + { + /* + * If the aggregate function states don't fit into the existing + * storage, reallocate it. + */ + if (policy->last_used_key_index >= policy->num_allocated_per_key_agg_states) + { + policy->per_agg_per_key_states[agg_index] = + repalloc(policy->per_agg_per_key_states[agg_index], + new_aggstate_rows * agg_def->func.state_bytes); + } -static pg_noinline uint32 -fill_offsets_arrow_fixed_4(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, int start_row, - int end_row, uint32 *restrict offsets) -{ - return fill_offsets_dispatch(policy, - batch_state, - key_column_index, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key_arrow_fixed_4); -} + void *first_uninitialized_state = + agg_def->func.state_bytes * (last_initialized_key_index + 1) + + (char *) policy->per_agg_per_key_states[agg_index]; + agg_def->func.agg_init(first_uninitialized_state, + policy->last_used_key_index - last_initialized_key_index); + } -static pg_noinline uint32 -fill_offsets_arrow_fixed_2(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, int start_row, - int end_row, uint32 *restrict offsets) -{ - return fill_offsets_dispatch(policy, + /* + * Add this batch to the states of this aggregate function. + */ + compute_single_aggregate(policy, batch_state, - key_column_index, - next_unused_state_index, start_row, end_row, - offsets, - get_key_arrow_fixed_2); -} + agg_def, + policy->per_agg_per_key_states[agg_index]); + } -static pg_noinline uint32 -fill_offsets_scalar(GroupingPolicyHash *policy, DecompressBatchState *batch_state, - int key_column_index, uint32 next_unused_state_index, int start_row, - int end_row, uint32 *restrict offsets) -{ - return fill_offsets_dispatch(policy, - batch_state, - key_column_index, - next_unused_state_index, - start_row, - end_row, - offsets, - get_key_scalar); + /* + * Record the newly allocated number of aggregate function states in case we + * had to reallocate. + */ + if (policy->last_used_key_index >= policy->num_allocated_per_key_agg_states) + { + Assert(new_aggstate_rows > policy->num_allocated_per_key_agg_states); + policy->num_allocated_per_key_agg_states = new_aggstate_rows; + } } static void @@ -495,161 +293,60 @@ gp_hash_add_batch(GroupingPolicy *gp, DecompressBatchState *batch_state) Assert(!policy->returning_results); - const uint64_t *restrict filter = batch_state->vector_qual_result; const int n = batch_state->total_batch_rows; /* * Initialize the array for storing the aggregate state offsets corresponding - * to a given batch row. + * to a given batch row. We don't need the offsets for the previous batch + * that are currently stored there, so we don't need to use repalloc. */ - if ((size_t) n > policy->num_allocated_offsets) + if ((size_t) n > policy->num_key_index_for_row) { - policy->num_allocated_offsets = n; - policy->offsets = palloc(sizeof(policy->offsets[0]) * policy->num_allocated_offsets); + if (policy->key_index_for_row != NULL) + { + pfree(policy->key_index_for_row); + } + policy->num_key_index_for_row = n; + policy->key_index_for_row = + palloc(sizeof(policy->key_index_for_row[0]) * policy->num_key_index_for_row); } - memset(policy->offsets, 0, n * sizeof(policy->offsets[0])); + memset(policy->key_index_for_row, 0, n * sizeof(policy->key_index_for_row[0])); /* - * For the partial aggregation node, the grouping columns are always in the - * output, so we don't have to separately look at the list of the grouping - * columns. + * Allocate the temporary filter array for computing the combined results of + * batch filter, aggregate filter and column validity. */ - Assert(list_length(policy->output_grouping_columns) == 1); - GroupingColumn *g = linitial(policy->output_grouping_columns); - CompressedColumnValues *key_column = &batch_state->compressed_columns[g->input_offset]; - int start_row = 0; - int end_row = 0; - for (start_row = 0; start_row < n; start_row = end_row) + const size_t num_words = (n + 63) / 64; + if (num_words > policy->num_tmp_filter_words) { - /* - * If we have a highly selective filter, it's easy to skip the rows for - * which the entire words of the filter bitmap are zero. - */ - if (filter) - { - if (filter[start_row / 64] == 0) - { - end_row = MIN(start_row + 64, n); - policy->stat_bulk_filtered_rows += 64; - continue; - } - - for (end_row = start_row; end_row < n; end_row = MIN(end_row + 64, n)) - { - if (filter[end_row / 64] == 0) - { - break; - } - } - } - else - { - end_row = n; - } - Assert(start_row <= end_row); - Assert(end_row <= n); - - /* - * Remember which aggregation states have already existed, and which we - * have to initialize. State index zero is invalid, and state index one - * is for null key. We have to initialize the null key state at the - * first run. - */ - const uint32 last_initialized_state_index = - policy->table->members ? policy->table->members + 2 : 1; - uint32 next_unused_state_index = policy->table->members + 2; + policy->tmp_filter = palloc(sizeof(*policy->tmp_filter) * (num_words * 2 + 1)); + policy->num_tmp_filter_words = (num_words * 2 + 1); + } - /* - * Match rows to aggregation states using a hash table. - */ - Assert((size_t) end_row <= policy->num_allocated_offsets); - switch ((int) key_column->decompression_type) - { - case DT_Scalar: - next_unused_state_index = fill_offsets_scalar(policy, - batch_state, - g->input_offset, - next_unused_state_index, - start_row, - end_row, - policy->offsets); - break; - case 8: - next_unused_state_index = fill_offsets_arrow_fixed_8(policy, - batch_state, - g->input_offset, - next_unused_state_index, - start_row, - end_row, - policy->offsets); - break; - case 4: - next_unused_state_index = fill_offsets_arrow_fixed_4(policy, - batch_state, - g->input_offset, - next_unused_state_index, - start_row, - end_row, - policy->offsets); - break; - case 2: - next_unused_state_index = fill_offsets_arrow_fixed_2(policy, - batch_state, - g->input_offset, - next_unused_state_index, - start_row, - end_row, - policy->offsets); - break; - default: - Assert(false); - break; - } + /* + * Arrange the input compressed columns in the order of grouping columns. + */ + for (int i = 0; i < policy->num_grouping_columns; i++) + { + const GroupingColumn *def = &policy->grouping_columns[i]; + const CompressedColumnValues *values = &batch_state->compressed_columns[def->input_offset]; + policy->current_batch_grouping_column_values[i] = *values; + } - ListCell *aggdeflc; - ListCell *aggstatelc; + /* + * Call the per-batch initialization function of the hashing strategy. + */ - /* - * Initialize the aggregate function states for the newly added keys. - */ - if (next_unused_state_index > last_initialized_state_index) - { - if (next_unused_state_index > policy->allocated_aggstate_rows) - { - policy->allocated_aggstate_rows = policy->allocated_aggstate_rows * 2 + 1; - forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) - { - VectorAggDef *agg_def = lfirst(aggdeflc); - lfirst(aggstatelc) = - repalloc(lfirst(aggstatelc), - policy->allocated_aggstate_rows * agg_def->func.state_bytes); - } - } + policy->hashing.prepare_for_batch(policy, batch_state); - forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) - { - const VectorAggDef *agg_def = lfirst(aggdeflc); - agg_def->func.agg_init(agg_def->func.state_bytes * last_initialized_state_index + - (char *) lfirst(aggstatelc), - next_unused_state_index - last_initialized_state_index); - } - } + /* + * Add the batch rows to aggregate function states. + */ + const uint64_t *restrict filter = batch_state->vector_qual_result; + add_one_range(policy, batch_state, 0, n); - /* - * Update the aggregate function states. - */ - forboth (aggdeflc, policy->agg_defs, aggstatelc, policy->per_agg_states) - { - compute_single_aggregate(batch_state, - start_row, - end_row, - lfirst(aggdeflc), - lfirst(aggstatelc), - policy->offsets, - policy->agg_extra_mctx); - } - } - Assert(end_row == n); + policy->stat_input_total_rows += batch_state->total_batch_rows; + policy->stat_input_valid_rows += arrow_num_valid(filter, batch_state->total_batch_rows); } static bool @@ -657,6 +354,15 @@ gp_hash_should_emit(GroupingPolicy *gp) { GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; + if (policy->last_used_key_index > UINT32_MAX - GLOBAL_MAX_ROWS_PER_COMPRESSION) + { + /* + * The max valid key index is UINT32_MAX, so we have to spill if the next + * batch can possibly lead to key index overflow. + */ + return true; + } + /* * Don't grow the hash table cardinality too much, otherwise we become bound * by memory reads. In general, when this first stage of grouping doesn't @@ -664,7 +370,7 @@ gp_hash_should_emit(GroupingPolicy *gp) * work will be done by the final Postgres aggregation, so we should bail * out early here. */ - return policy->table->members * sizeof(HashEntry) > 128 * 1024; + return policy->hashing.get_size_bytes(&policy->hashing) > 512 * 1024; } static bool @@ -675,61 +381,65 @@ gp_hash_do_emit(GroupingPolicy *gp, TupleTableSlot *aggregated_slot) if (!policy->returning_results) { policy->returning_results = true; - h_start_iterate(policy->table, &policy->iter); + policy->last_returned_key = 1; - const float keys = policy->table->members + policy->have_null_key; + const float keys = policy->last_used_key_index; if (keys > 0) { - DEBUG_LOG("spill after %ld input %ld valid %ld bulk filtered %.0f keys %f ratio %ld " - "aggctx bytes %ld aggstate bytes", + DEBUG_LOG("spill after %ld input, %ld valid, %ld bulk filtered, %ld cons, %.0f keys, " + "%f ratio, %ld curctx bytes, %ld aggstate bytes", policy->stat_input_total_rows, policy->stat_input_valid_rows, - policy->stat_bulk_filtered_rows, + 0UL, + policy->stat_consecutive_keys, keys, policy->stat_input_valid_rows / keys, - MemoryContextMemAllocated(policy->table->ctx, false), + MemoryContextMemAllocated(CurrentMemoryContext, false), MemoryContextMemAllocated(policy->agg_extra_mctx, false)); } } - - HashEntry null_key_entry = { .agg_state_index = 1 }; - HashEntry *entry = h_iterate(policy->table, &policy->iter); - bool key_is_null = false; - if (entry == NULL && policy->have_null_key) + else { - policy->have_null_key = false; - entry = &null_key_entry; - key_is_null = true; + policy->last_returned_key++; } - if (entry == NULL) + const uint32 current_key = policy->last_returned_key; + const uint32 keys_end = policy->last_used_key_index + 1; + if (current_key >= keys_end) { policy->returning_results = false; return false; } - const int naggs = list_length(policy->agg_defs); + const int naggs = policy->num_agg_defs; for (int i = 0; i < naggs; i++) { - VectorAggDef *agg_def = (VectorAggDef *) list_nth(policy->agg_defs, i); - void *agg_states = list_nth(policy->per_agg_states, i); - void *agg_state = entry->agg_state_index * agg_def->func.state_bytes + (char *) agg_states; + const VectorAggDef *agg_def = &policy->agg_defs[i]; + void *agg_states = policy->per_agg_per_key_states[i]; + void *agg_state = current_key * agg_def->func.state_bytes + (char *) agg_states; agg_def->func.agg_emit(agg_state, &aggregated_slot->tts_values[agg_def->output_offset], &aggregated_slot->tts_isnull[agg_def->output_offset]); } - Assert(list_length(policy->output_grouping_columns) == 1); - GroupingColumn *col = linitial(policy->output_grouping_columns); - aggregated_slot->tts_values[col->output_offset] = entry->key; - aggregated_slot->tts_isnull[col->output_offset] = key_is_null; + policy->hashing.emit_key(policy, current_key, aggregated_slot); + + DEBUG_PRINT("%p: output key index %d\n", policy, current_key); return true; } +static char * +gp_hash_explain(GroupingPolicy *gp) +{ + GroupingPolicyHash *policy = (GroupingPolicyHash *) gp; + return psprintf("hashed with %s key", policy->hashing.explain_name); +} + static const GroupingPolicy grouping_policy_hash_functions = { .gp_reset = gp_hash_reset, .gp_add_batch = gp_hash_add_batch, .gp_should_emit = gp_hash_should_emit, .gp_do_emit = gp_hash_do_emit, + .gp_explain = gp_hash_explain, }; diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.h b/tsl/src/nodes/vector_agg/grouping_policy_hash.h new file mode 100644 index 00000000000..d7cda9d77c4 --- /dev/null +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.h @@ -0,0 +1,158 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#pragma once + +#include + +#include + +#include "grouping_policy.h" + +#include "nodes/decompress_chunk/compressed_batch.h" + +#include "hashing/hashing_strategy.h" + +typedef struct GroupingPolicyHash GroupingPolicyHash; + +/* + * Hash grouping policy. + * + * The grouping and aggregation is performed as follows: + * + * 0) The grouping policy keeps track of the unique grouping keys seen in + * the input rows, and the states of aggregate functions for each key. This + * spans multiple input compressed batches, and is reset after the partial + * aggregation results are emitted. + * + * 1) For each row of the new compressed batch, we obtain an index that + * uniquely identifies its grouping key. This is done by matching the row's + * grouping columns to the hash table recording the unique grouping keys and + * their respective indexes. It is performed in bulk for all rows of the batch, + * to improve memory locality. The details of this are managed by the hashing + * strategy. + * + * 2) The key indexes are used to locate the aggregate function states + * corresponding to a given row, and update it. This is done in bulk for all + * rows of the batch, and for each aggregate function separately, to generate + * simpler and potentially vectorizable code, and improve memory locality. + * + * 3) After the input have ended, or if the memory limit is reached, the partial + * results are emitted into the output slot. This is done in the order of unique + * grouping key indexes, thereby preserving the incoming key order. This + * guarantees that this policy works correctly even in a Partial GroupAggregate + * node, even though it's not optimal performance-wise. + */ +typedef struct GroupingPolicyHash +{ + /* + * We're using data inheritance from the GroupingPolicy. + */ + GroupingPolicy funcs; + + /* + * Aggregate function definitions. + */ + int num_agg_defs; + const VectorAggDef *restrict agg_defs; + + /* + * Grouping column definitions. + */ + int num_grouping_columns; + const GroupingColumn *restrict grouping_columns; + + /* + * The values of the grouping columns picked from the compressed batch and + * arranged in the order of grouping column definitions. + */ + CompressedColumnValues *restrict current_batch_grouping_column_values; + + /* + * Hashing strategy that is responsible for mapping the rows to the unique + * indexes of their grouping keys. + */ + HashingStrategy hashing; + + /* + * The last used index of an unique grouping key. Key index 0 is invalid. + */ + uint32 last_used_key_index; + + /* + * Temporary storage of unique indexes of keys corresponding to a given row + * of the compressed batch that is currently being aggregated. We keep it in + * the policy because it is potentially too big to keep on stack, and we + * don't want to reallocate it for each batch. + */ + uint32 *restrict key_index_for_row; + uint64 num_key_index_for_row; + + /* + * The temporary filter bitmap we use to combine the results of the + * vectorized filters in WHERE, validity of the aggregate function argument, + * and the aggregate FILTER clause. It is then used by the aggregate + * function implementation to filter out the rows that don't pass. + */ + uint64 *tmp_filter; + uint64 num_tmp_filter_words; + + /* + * Aggregate function states. Each element is an array of states for the + * respective function from agg_defs. These arrays are indexed by the unique + * grouping key indexes. The key index 0 is invalid, so the corresponding + * states are unused. + * The states of each aggregate function are stored separately and + * contiguously, to achieve better memory locality when updating them. + */ + void **per_agg_per_key_states; + uint64 num_allocated_per_key_agg_states; + + /* + * A memory context for aggregate functions to allocate additional data, + * i.e. if they store strings or float8 datum on 32-bit systems. Valid until + * the grouping policy is reset. + */ + MemoryContext agg_extra_mctx; + + /* + * Whether we are in the mode of returning the partial aggregation results. + * If we are, track the index of the last returned grouping key. + */ + bool returning_results; + uint32 last_returned_key; + + /* + * Some statistics for debugging. + */ + uint64 stat_input_total_rows; + uint64 stat_input_valid_rows; + uint64 stat_consecutive_keys; + + /* + * FIXME all the stuff below should be moved out. + */ + + /* + * Temporary key storages. Some hashing strategies need to put the key in a + * separate memory area, we don't want to alloc/free it on each row. + */ + uint8 *tmp_key_storage; + uint64 num_tmp_key_storage_bytes; + + /* + * For single text key that uses dictionary encoding, in some cases we first + * calculate the key indexes for the dictionary entries, and then translate + * it to the actual rows. + */ + uint32 *restrict key_index_for_dict; + uint64 num_key_index_for_dict; + bool use_key_index_for_dict; +} GroupingPolicyHash; + +//#define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) +#ifndef DEBUG_PRINT +#define DEBUG_PRINT(...) +#endif diff --git a/tsl/src/nodes/vector_agg/hashing/CMakeLists.txt b/tsl/src/nodes/vector_agg/hashing/CMakeLists.txt new file mode 100644 index 00000000000..c6ff65f65ca --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/CMakeLists.txt @@ -0,0 +1,6 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/hash_strategy_single_fixed_2.c + ${CMAKE_CURRENT_SOURCE_DIR}/hash_strategy_single_fixed_4.c + ${CMAKE_CURRENT_SOURCE_DIR}/hash_strategy_single_fixed_8.c + ${CMAKE_CURRENT_SOURCE_DIR}/hash_strategy_common.c) +target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES}) diff --git a/tsl/src/nodes/vector_agg/hashing/batch_hashing_params.h b/tsl/src/nodes/vector_agg/hashing/batch_hashing_params.h new file mode 100644 index 00000000000..a4db2a19b1c --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/batch_hashing_params.h @@ -0,0 +1,55 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#pragma once + +typedef struct BatchHashingParams +{ + const uint64 *batch_filter; + CompressedColumnValues single_key; + + int num_grouping_columns; + const CompressedColumnValues *grouping_column_values; + + /* + * Whether we have any scalar or nullable grouping columns in the current + * batch. This is used to select the more efficient implementation when we + * have none. + */ + bool have_scalar_or_nullable_columns; + + GroupingPolicyHash *restrict policy; + + uint32 *restrict result_key_indexes; +} BatchHashingParams; + +static pg_attribute_always_inline BatchHashingParams +build_batch_hashing_params(GroupingPolicyHash *policy, DecompressBatchState *batch_state) +{ + BatchHashingParams params = { + .policy = policy, + .batch_filter = batch_state->vector_qual_result, + .num_grouping_columns = policy->num_grouping_columns, + .grouping_column_values = policy->current_batch_grouping_column_values, + .result_key_indexes = policy->key_index_for_row, + }; + + Assert(policy->num_grouping_columns > 0); + if (policy->num_grouping_columns == 1) + { + params.single_key = policy->current_batch_grouping_column_values[0]; + } + + for (int i = 0; i < policy->num_grouping_columns; i++) + { + params.have_scalar_or_nullable_columns = + params.have_scalar_or_nullable_columns || + (policy->current_batch_grouping_column_values[i].decompression_type == DT_Scalar || + policy->current_batch_grouping_column_values[i].buffers[0] != NULL); + } + + return params; +} diff --git a/tsl/src/nodes/vector_agg/hashing/hash64.h b/tsl/src/nodes/vector_agg/hashing/hash64.h new file mode 100644 index 00000000000..97b0ec17987 --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/hash64.h @@ -0,0 +1,36 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#pragma once + +/* + * We can use crc32 as a hash function, it has bad properties but takes only one + * cycle, which is why it is sometimes used in the existing hash table + * implementations. When we don't have the crc32 instruction, use the SplitMix64 + * finalizer. + */ +static pg_attribute_always_inline uint64 +hash64_splitmix(uint64 x) +{ + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9U; + x ^= x >> 27; + x *= 0x94d049bb133111ebU; + x ^= x >> 31; + return x; +} + +#ifdef USE_SSE42_CRC32C +#include +static pg_attribute_always_inline uint64 +hash64_crc(uint64 x) +{ + return _mm_crc32_u64(~0ULL, x); +} + +#define HASH64 hash64_crc +#else +#define HASH64 hash64_splitmix +#endif diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_common.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_common.c new file mode 100644 index 00000000000..ead986dc600 --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_common.c @@ -0,0 +1,52 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#include "hashing_strategy.h" + +#include "nodes/vector_agg/exec.h" +#include "nodes/vector_agg/grouping_policy_hash.h" + +/* + * Allocate enough storage for keys, given that each row of the new compressed + * batch might turn out to be a new grouping key. We do this separately to avoid + * allocations in the hot loop that fills the hash table. + */ +void +hash_strategy_output_key_alloc(GroupingPolicyHash *policy, DecompressBatchState *batch_state) +{ + HashingStrategy *hashing = &policy->hashing; + const int n = batch_state->total_batch_rows; + const uint32 num_possible_keys = policy->last_used_key_index + 1 + n; + if (num_possible_keys > hashing->num_allocated_output_keys) + { + hashing->num_allocated_output_keys = num_possible_keys * 2 + 1; + const size_t new_bytes = sizeof(Datum) * hashing->num_allocated_output_keys; + if (hashing->output_keys == NULL) + { + hashing->output_keys = palloc(new_bytes); + } + else + { + hashing->output_keys = repalloc(hashing->output_keys, new_bytes); + } + } +} + +/* + * Emit a single-column grouping key with the given index into the aggregated + * slot. + */ +void +hash_strategy_output_key_single_emit(GroupingPolicyHash *policy, uint32 current_key, + TupleTableSlot *aggregated_slot) +{ + HashingStrategy *hashing = &policy->hashing; + Assert(policy->num_grouping_columns == 1); + + const GroupingColumn *col = &policy->grouping_columns[0]; + aggregated_slot->tts_values[col->output_offset] = hashing->output_keys[current_key]; + aggregated_slot->tts_isnull[col->output_offset] = current_key == hashing->null_key_index; +} diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c new file mode 100644 index 00000000000..5656e8d69b4 --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c @@ -0,0 +1,281 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#include "batch_hashing_params.h" + +/* + * The hash table maps the value of the grouping key to its unique index. + * We don't store any extra information here, because we're accessing the memory + * of the hash table randomly, and want it to be as small as possible to fit the + * caches. + */ +typedef struct FUNCTION_NAME(entry) +{ + /* Key index 0 is invalid. */ + uint32 key_index; + + uint8 status; + + HASH_TABLE_KEY_TYPE hash_table_key; +} FUNCTION_NAME(entry); + +#define SH_PREFIX KEY_VARIANT +#define SH_ELEMENT_TYPE FUNCTION_NAME(entry) +#define SH_KEY_TYPE HASH_TABLE_KEY_TYPE +#define SH_KEY hash_table_key +#define SH_HASH_KEY(tb, key) KEY_HASH(key) +#define SH_EQUAL(tb, a, b) KEY_EQUAL(a, b) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include + +struct FUNCTION_NAME(hash); + +static uint64 +FUNCTION_NAME(get_size_bytes)(HashingStrategy *hashing) +{ + struct FUNCTION_NAME(hash) *hash = (struct FUNCTION_NAME(hash) *) hashing->table; + return hash->members * sizeof(FUNCTION_NAME(entry)); +} + +static void +FUNCTION_NAME(hash_strategy_init)(HashingStrategy *hashing, GroupingPolicyHash *policy) +{ + hashing->table = + FUNCTION_NAME(create)(CurrentMemoryContext, policy->num_allocated_per_key_agg_states, NULL); + + FUNCTION_NAME(key_hashing_init)(hashing); +} + +static void +FUNCTION_NAME(hash_strategy_reset)(HashingStrategy *hashing) +{ + struct FUNCTION_NAME(hash) *table = (struct FUNCTION_NAME(hash) *) hashing->table; + FUNCTION_NAME(reset)(table); + hashing->null_key_index = 0; +} + +static void +FUNCTION_NAME(hash_strategy_prepare_for_batch)(GroupingPolicyHash *policy, + DecompressBatchState *batch_state) +{ + hash_strategy_output_key_alloc(policy, batch_state); + FUNCTION_NAME(key_hashing_prepare_for_batch)(policy, batch_state); +} + +/* + * Fill the unique key indexes for all rows of the batch, using a hash table. + */ +static pg_attribute_always_inline void +FUNCTION_NAME(fill_offsets_impl)(BatchHashingParams params, int start_row, int end_row) +{ + GroupingPolicyHash *policy = params.policy; + HashingStrategy *hashing = &policy->hashing; + + uint32 *restrict indexes = params.result_key_indexes; + + struct FUNCTION_NAME(hash) *restrict table = hashing->table; + + HASH_TABLE_KEY_TYPE prev_hash_table_key; + uint32 previous_key_index = 0; + for (int row = start_row; row < end_row; row++) + { + if (!arrow_row_is_valid(params.batch_filter, row)) + { + /* The row doesn't pass the filter. */ + DEBUG_PRINT("%p: row %d doesn't pass batch filter\n", policy, row); + continue; + } + + /* + * Get the key for the given row. For some hashing strategies, the key + * that is used for the hash table is different from actual values of + * the grouping columns, termed "output key" here. + */ + bool key_valid = false; + OUTPUT_KEY_TYPE output_key = { 0 }; + HASH_TABLE_KEY_TYPE hash_table_key = { 0 }; + FUNCTION_NAME(key_hashing_get_key)(params, row, &output_key, &hash_table_key, &key_valid); + + if (unlikely(!key_valid)) + { + /* The key is null. */ + if (hashing->null_key_index == 0) + { + hashing->null_key_index = ++policy->last_used_key_index; + } + indexes[row] = hashing->null_key_index; + DEBUG_PRINT("%p: row %d null key index %d\n", policy, row, hashing->null_key_index); + continue; + } + + if (likely(previous_key_index != 0) && KEY_EQUAL(hash_table_key, prev_hash_table_key)) + { + /* + * In real data sets, we often see consecutive rows with the + * same value of a grouping column, so checking for this case + * improves performance. For multi-column keys, this is unlikely, + * but we currently often have suboptimal plans that use this policy + * as a GroupAggregate, so we still use this as an easy optimization + * for that case. + */ + indexes[row] = previous_key_index; +#ifndef NDEBUG + policy->stat_consecutive_keys++; +#endif + DEBUG_PRINT("%p: row %d consecutive key index %d\n", policy, row, previous_key_index); + continue; + } + + /* + * Find the key using the hash table. + */ + bool found = false; + FUNCTION_NAME(entry) *restrict entry = FUNCTION_NAME(insert)(table, hash_table_key, &found); + if (!found) + { + /* + * New key, have to store it persistently. + */ + const uint32 index = ++policy->last_used_key_index; + entry->key_index = index; + FUNCTION_NAME(key_hashing_store_new)(policy, index, output_key); + DEBUG_PRINT("%p: row %d new key index %d\n", policy, row, index); + } + else + { + DEBUG_PRINT("%p: row %d old key index %d\n", policy, row, entry->key_index); + } + indexes[row] = entry->key_index; + + previous_key_index = entry->key_index; + prev_hash_table_key = entry->hash_table_key; + } +} + +/* + * For some configurations of hashing, we want to generate dedicated + * implementations that will be more efficient. For example, for 2-byte keys + * when all the batch and key rows are valid. + */ +#define APPLY_FOR_BATCH_FILTER(X, NAME, COND) \ + X(NAME##_nofilter, (COND) && (params.batch_filter == NULL)) \ + X(NAME##_filter, (COND) && (params.batch_filter != NULL)) + +#define APPLY_FOR_NULLABILITY(X, NAME, COND) \ + APPLY_FOR_BATCH_FILTER(X, NAME##_notnull, (COND) && params.single_key.buffers[0] == NULL) \ + APPLY_FOR_BATCH_FILTER(X, NAME##_nullable, (COND) && params.single_key.buffers[0] != NULL) + +#define APPLY_FOR_SCALARS(X, NAME, COND) \ + APPLY_FOR_BATCH_FILTER(X, \ + NAME##_noscalar_notnull, \ + (COND) && !params.have_scalar_or_nullable_columns) \ + APPLY_FOR_BATCH_FILTER(X, \ + NAME##_scalar_or_nullable, \ + (COND) && params.have_scalar_or_nullable_columns) + +#define APPLY_FOR_TYPE(X, NAME, COND) \ + APPLY_FOR_NULLABILITY(X, \ + NAME##_byval, \ + (COND) && \ + params.single_key.decompression_type == sizeof(OUTPUT_KEY_TYPE)) \ + APPLY_FOR_NULLABILITY(X, \ + NAME##_text, \ + (COND) && params.single_key.decompression_type == DT_ArrowText) \ + APPLY_FOR_NULLABILITY(X, \ + NAME##_dict, \ + (COND) && params.single_key.decompression_type == DT_ArrowTextDict) \ + APPLY_FOR_SCALARS(X, NAME##_multi, (COND) && params.single_key.decompression_type == DT_Invalid) + +#define APPLY_FOR_SPECIALIZATIONS(X) APPLY_FOR_TYPE(X, index, true) + +#define DEFINE(NAME, CONDITION) \ + static pg_noinline void FUNCTION_NAME( \ + NAME)(BatchHashingParams params, int start_row, int end_row) \ + { \ + if (!(CONDITION)) \ + { \ + pg_unreachable(); \ + } \ + \ + FUNCTION_NAME(fill_offsets_impl)(params, start_row, end_row); \ + } + +APPLY_FOR_SPECIALIZATIONS(DEFINE) + +#undef DEFINE + +static void +FUNCTION_NAME(dispatch_for_params)(BatchHashingParams params, int start_row, int end_row) +{ + if (params.num_grouping_columns == 0) + { + pg_unreachable(); + } + + if ((params.num_grouping_columns == 1) != (params.single_key.decompression_type != DT_Invalid)) + { + pg_unreachable(); + } + +#define DISPATCH(NAME, CONDITION) \ + if (CONDITION) \ + { \ + FUNCTION_NAME(NAME)(params, start_row, end_row); \ + } \ + else + + APPLY_FOR_SPECIALIZATIONS(DISPATCH) + { + /* Use a generic implementation if no specializations matched. */ + FUNCTION_NAME(fill_offsets_impl)(params, start_row, end_row); + } +#undef DISPATCH +} + +#undef APPLY_FOR_SPECIALIZATIONS + +/* + * In some special cases we call a more efficient specialization of the grouping + * function. + */ +static void +FUNCTION_NAME(fill_offsets)(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int start_row, int end_row) +{ + Assert((size_t) end_row <= policy->num_key_index_for_row); + + BatchHashingParams params = build_batch_hashing_params(policy, batch_state); + +#ifdef USE_DICT_HASHING + if (policy->use_key_index_for_dict) + { + Assert(params.single_key.decompression_type == DT_ArrowTextDict); + single_text_offsets_translate(params, start_row, end_row); + return; + } +#endif + + FUNCTION_NAME(dispatch_for_params)(params, start_row, end_row); +} + +HashingStrategy FUNCTION_NAME(strategy) = { + .emit_key = FUNCTION_NAME(emit_key), + .explain_name = EXPLAIN_NAME, + .fill_offsets = FUNCTION_NAME(fill_offsets), + .get_size_bytes = FUNCTION_NAME(get_size_bytes), + .init = FUNCTION_NAME(hash_strategy_init), + .prepare_for_batch = FUNCTION_NAME(hash_strategy_prepare_for_batch), + .reset = FUNCTION_NAME(hash_strategy_reset), +}; + +#undef EXPLAIN_NAME +#undef KEY_VARIANT +#undef KEY_EQUAL +#undef OUTPUT_KEY_TYPE +#undef HASH_TABLE_KEY_TYPE +#undef USE_DICT_HASHING diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl_single_fixed_key.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl_single_fixed_key.c new file mode 100644 index 00000000000..c8a3c2da57f --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl_single_fixed_key.c @@ -0,0 +1,70 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * Key handling function for a single fixed-size grouping key. + */ + +#include "batch_hashing_params.h" + +static void +FUNCTION_NAME(key_hashing_init)(HashingStrategy *hashing) +{ +} + +static void +FUNCTION_NAME(key_hashing_prepare_for_batch)(GroupingPolicyHash *policy, + DecompressBatchState *batch_state) +{ +} + +static pg_attribute_always_inline void +FUNCTION_NAME(key_hashing_get_key)(BatchHashingParams params, int row, + void *restrict output_key_ptr, void *restrict hash_table_key_ptr, + bool *restrict valid) +{ + OUTPUT_KEY_TYPE *restrict output_key = (OUTPUT_KEY_TYPE *) output_key_ptr; + HASH_TABLE_KEY_TYPE *restrict hash_table_key = (HASH_TABLE_KEY_TYPE *) hash_table_key_ptr; + + if (unlikely(params.single_key.decompression_type == DT_Scalar)) + { + *output_key = DATUM_TO_OUTPUT_KEY(*params.single_key.output_value); + *valid = !*params.single_key.output_isnull; + } + else if (params.single_key.decompression_type == sizeof(OUTPUT_KEY_TYPE)) + { + const OUTPUT_KEY_TYPE *values = params.single_key.buffers[1]; + *valid = arrow_row_is_valid(params.single_key.buffers[0], row); + *output_key = values[row]; + } + else + { + pg_unreachable(); + } + + /* + * For the fixed-size hash grouping, we use the output key as the hash table + * key as well. + */ + *hash_table_key = *output_key; +} + +static pg_attribute_always_inline void +FUNCTION_NAME(key_hashing_store_new)(GroupingPolicyHash *restrict policy, uint32 new_key_index, + OUTPUT_KEY_TYPE output_key) +{ + policy->hashing.output_keys[new_key_index] = OUTPUT_KEY_TO_DATUM(output_key); +} + +static void +FUNCTION_NAME(emit_key)(GroupingPolicyHash *policy, uint32 current_key, + TupleTableSlot *aggregated_slot) +{ + hash_strategy_output_key_single_emit(policy, current_key, aggregated_slot); +} + +#undef DATUM_TO_OUTPUT_KEY +#undef OUTPUT_KEY_TO_DATUM diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_2.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_2.c new file mode 100644 index 00000000000..8e2a9083d06 --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_2.c @@ -0,0 +1,32 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * Implementation of column hashing for a single fixed size 2-byte column. + */ + +#include + +#include "compression/arrow_c_data_interface.h" +#include "hash64.h" +#include "nodes/decompress_chunk/compressed_batch.h" +#include "nodes/vector_agg/exec.h" +#include "nodes/vector_agg/grouping_policy_hash.h" +#include "template_helper.h" + +#define EXPLAIN_NAME "single 2-byte" +#define KEY_VARIANT single_fixed_2 +#define OUTPUT_KEY_TYPE int16 +#define HASH_TABLE_KEY_TYPE OUTPUT_KEY_TYPE +#define DATUM_TO_OUTPUT_KEY DatumGetInt16 +#define OUTPUT_KEY_TO_DATUM Int16GetDatum + +#include "hash_strategy_impl_single_fixed_key.c" + +#define KEY_EQUAL(a, b) a == b +#define KEY_HASH(X) HASH64(X) + +#include "hash_strategy_impl.c" diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c new file mode 100644 index 00000000000..96679548d52 --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c @@ -0,0 +1,32 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * Implementation of column hashing for a single fixed size 2-byte column. + */ + +#include + +#include "compression/arrow_c_data_interface.h" +#include "hash64.h" +#include "nodes/decompress_chunk/compressed_batch.h" +#include "nodes/vector_agg/exec.h" +#include "nodes/vector_agg/grouping_policy_hash.h" +#include "template_helper.h" + +#define EXPLAIN_NAME "single 4-byte" +#define KEY_VARIANT single_fixed_4 +#define OUTPUT_KEY_TYPE int32 +#define HASH_TABLE_KEY_TYPE int32 +#define DATUM_TO_OUTPUT_KEY DatumGetInt32 +#define OUTPUT_KEY_TO_DATUM Int32GetDatum + +#include "hash_strategy_impl_single_fixed_key.c" + +#define KEY_EQUAL(a, b) a == b +#define KEY_HASH(X) HASH64(X) + +#include "hash_strategy_impl.c" diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_8.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_8.c new file mode 100644 index 00000000000..e0f12adf85b --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_8.c @@ -0,0 +1,32 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * Implementation of column hashing for a single fixed size 8-byte column. + */ + +#include + +#include "compression/arrow_c_data_interface.h" +#include "hash64.h" +#include "nodes/decompress_chunk/compressed_batch.h" +#include "nodes/vector_agg/exec.h" +#include "nodes/vector_agg/grouping_policy_hash.h" +#include "template_helper.h" + +#define EXPLAIN_NAME "single 8-byte" +#define KEY_VARIANT single_fixed_8 +#define OUTPUT_KEY_TYPE int64 +#define HASH_TABLE_KEY_TYPE int64 +#define DATUM_TO_OUTPUT_KEY DatumGetInt64 +#define OUTPUT_KEY_TO_DATUM Int64GetDatum + +#include "hash_strategy_impl_single_fixed_key.c" + +#define KEY_EQUAL(a, b) a == b +#define KEY_HASH(X) HASH64(X) + +#include "hash_strategy_impl.c" diff --git a/tsl/src/nodes/vector_agg/hashing/hashing_strategy.h b/tsl/src/nodes/vector_agg/hashing/hashing_strategy.h new file mode 100644 index 00000000000..e2bfda3b180 --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/hashing_strategy.h @@ -0,0 +1,64 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#pragma once + +#include + +typedef struct GroupingPolicyHash GroupingPolicyHash; + +typedef struct HashingStrategy HashingStrategy; + +typedef struct DecompressBatchState DecompressBatchState; + +typedef struct TupleTableSlot TupleTableSlot; + +typedef struct HashingStrategy +{ + char *explain_name; + void (*init)(HashingStrategy *hashing, GroupingPolicyHash *policy); + void (*reset)(HashingStrategy *hashing); + uint64 (*get_size_bytes)(HashingStrategy *hashing); + void (*prepare_for_batch)(GroupingPolicyHash *policy, DecompressBatchState *batch_state); + void (*fill_offsets)(GroupingPolicyHash *policy, DecompressBatchState *batch_state, + int start_row, int end_row); + void (*emit_key)(GroupingPolicyHash *policy, uint32 current_key, + TupleTableSlot *aggregated_slot); + + /* + * The hash table we use for grouping. It matches each grouping key to its + * unique integer index. + */ + void *table; + + /* + * For each unique grouping key, we store the values of the grouping columns. + * This is stored separately from hash table keys, because they might not + * have the full column values, and also storing them contiguously here + * leads to better memory access patterns when emitting the results. + * The details of the key storage are managed by the hashing strategy. The + * by-reference keys can use a separate memory context for dense storage. + */ + Datum *restrict output_keys; + uint64 num_allocated_output_keys; + MemoryContext key_body_mctx; + + /* + * In single-column grouping, we store the null key outside of the hash + * table, and its index is given by this value. Key index 0 is invalid. + * This is done to avoid having an "is null" flag in the hash table entries, + * to reduce the hash table size. + */ + uint32 null_key_index; + + /* + * UMASH fingerprinting parameters. + */ + struct umash_params *umash_params; +} HashingStrategy; + +void hash_strategy_output_key_alloc(GroupingPolicyHash *policy, DecompressBatchState *batch_state); +void hash_strategy_output_key_single_emit(GroupingPolicyHash *policy, uint32 current_key, + TupleTableSlot *aggregated_slot); diff --git a/tsl/src/nodes/vector_agg/hashing/template_helper.h b/tsl/src/nodes/vector_agg/hashing/template_helper.h new file mode 100644 index 00000000000..684186ab27e --- /dev/null +++ b/tsl/src/nodes/vector_agg/hashing/template_helper.h @@ -0,0 +1,10 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#pragma once + +#define FUNCTION_NAME_HELPER2(X, Y) X##_##Y +#define FUNCTION_NAME_HELPER(X, Y) FUNCTION_NAME_HELPER2(X, Y) +#define FUNCTION_NAME(Y) FUNCTION_NAME_HELPER(KEY_VARIANT, Y) diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index 34ab45044fc..669a52ca203 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -366,13 +366,11 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) /* * We support hashed vectorized grouping by one fixed-size by-value * compressed column. - * We cannot use it when the plan has GroupAggregate because the - * latter requires sorted output. */ - if (agg->numCols == 1 && agg->aggstrategy == AGG_HASHED) + if (agg->numCols == 1) { int offset = AttrNumberGetAttrOffset(agg->grpColIdx[0]); - TargetEntry *entry = list_nth(agg->plan.targetlist, offset); + TargetEntry *entry = list_nth(resolved_targetlist, offset); bool is_segmentby = false; if (is_vector_var(custom, entry->expr, &is_segmentby)) diff --git a/tsl/test/expected/vector_agg_functions.out b/tsl/test/expected/vector_agg_functions.out index e7e970986cb..bffb8d7af7d 100644 --- a/tsl/test/expected/vector_agg_functions.out +++ b/tsl/test/expected/vector_agg_functions.out @@ -114,7 +114,7 @@ limit 1 set timescaledb.debug_require_vector_agg = :'guc_value'; ---- Uncomment to generate reference. Note that there are minor discrepancies ---- on float4 due to different numeric stability in our and PG implementations. --- set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'allow'; +--set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid'; select format('%sselect %s%s(%s) from aggfns%s%s%s;', explain, @@ -157,7 +157,8 @@ from 'cint2 is null']) with ordinality as condition(condition, n), unnest(array[ null, - 's']) with ordinality as grouping(grouping, n) + 's', + 'ss']) with ordinality as grouping(grouping, n) where true and (explain is null /* or condition is null and grouping = 's' */) @@ -190,6 +191,21 @@ select s, count(*) from aggfns group by s order by count(*), s limit 10; 9 | 20000 (10 rows) +select ss, count(*) from aggfns group by ss order by count(*), ss limit 10; + ss | count +----+------- + | 19 + 3 | 19981 + 4 | 19981 + 0 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 + 11 | 40019 +(10 rows) + select max(cdate) from aggfns; max ------------ @@ -211,6 +227,21 @@ select s, max(cdate) from aggfns group by s order by max(cdate), s limit 10; 9 | 06-01-2267 (10 rows) +select ss, max(cdate) from aggfns group by ss order by max(cdate), ss limit 10; + ss | max +----+------------ + 0 | 01-01-2021 + 3 | 02-21-2103 + | 02-21-2103 + 4 | 07-09-2130 + 11 | 07-09-2130 + 5 | 11-24-2157 + 6 | 04-11-2185 + 7 | 08-28-2212 + 8 | 01-14-2240 + 9 | 06-01-2267 +(10 rows) + select min(cdate) from aggfns; min ------------ @@ -232,6 +263,21 @@ select s, min(cdate) from aggfns group by s order by min(cdate), s limit 10; 9 | 06-01-2267 (10 rows) +select ss, min(cdate) from aggfns group by ss order by min(cdate), ss limit 10; + ss | min +----+------------ + 0 | 01-01-2021 + 11 | 05-19-2048 + 3 | 02-21-2103 + | 02-21-2103 + 4 | 07-09-2130 + 5 | 11-24-2157 + 6 | 04-11-2185 + 7 | 08-28-2212 + 8 | 01-14-2240 + 9 | 06-01-2267 +(10 rows) + select avg(cfloat4) from aggfns; avg ----- @@ -253,6 +299,21 @@ select s, avg(cfloat4) from aggfns group by s order by avg(cfloat4), s limit 10; 1 | NaN (10 rows) +select ss, avg(cfloat4) from aggfns group by ss order by avg(cfloat4), ss limit 10; + ss | avg +----+---------------------- + 3 | -Infinity + | -1.39583652270468 + 9 | -0.292700759558938 + 4 | -0.169252917487522 + 6 | -0.00610964622725733 + 5 | 0.0107821527590975 + 0 | 0.0862269837114494 + 7 | 0.19168354413514 + 8 | 0.456703752867272 + 11 | NaN +(10 rows) + select max(cfloat4) from aggfns; max ----- @@ -274,6 +335,21 @@ select s, max(cfloat4) from aggfns group by s order by max(cfloat4), s limit 10; 1 | NaN (10 rows) +select ss, max(cfloat4) from aggfns group by ss order by max(cfloat4), ss limit 10; + ss | max +----+--------- + | 47.2047 + 9 | 49.9899 + 4 | 49.9946 + 6 | 49.9956 + 7 | 49.9969 + 3 | 49.9979 + 5 | 49.9992 + 0 | 49.9995 + 8 | 49.9997 + 11 | NaN +(10 rows) + select min(cfloat4) from aggfns; min ----------- @@ -295,6 +371,21 @@ select s, min(cfloat4) from aggfns group by s order by min(cfloat4), s limit 10; 9 | -49.9911 (10 rows) +select ss, min(cfloat4) from aggfns group by ss order by min(cfloat4), ss limit 10; + ss | min +----+----------- + 3 | -Infinity + 4 | -49.9999 + 6 | -49.9995 + 7 | -49.9984 + 8 | -49.9969 + 0 | -49.9949 + 5 | -49.9942 + 9 | -49.9911 + | -45.4083 + 11 | NaN +(10 rows) + select stddev(cfloat4) from aggfns; stddev -------- @@ -316,6 +407,21 @@ select s, stddev(cfloat4) from aggfns group by s order by stddev(cfloat4), s lim 3 | NaN (10 rows) +select ss, stddev(cfloat4) from aggfns group by ss order by stddev(cfloat4), ss limit 10; + ss | stddev +----+------------------ + 0 | 28.7274163912974 + 7 | 28.7892027644318 + 4 | 28.8220943927954 + 9 | 28.8426424990846 + 6 | 28.9190577543738 + 8 | 29.0040125904064 + 5 | 29.0213532270614 + | 30.6324072248673 + 3 | NaN + 11 | NaN +(10 rows) + select sum(cfloat4) from aggfns; sum ----- @@ -337,6 +443,21 @@ select s, sum(cfloat4) from aggfns group by s order by sum(cfloat4), s limit 10; 1 | NaN (10 rows) +select ss, sum(cfloat4) from aggfns group by ss order by sum(cfloat4), ss limit 10; + ss | sum +----+----------- + 3 | -Infinity + 9 | -5854.02 + 4 | -3381.84 + 6 | -122.193 + | -26.5209 + 5 | 215.643 + 0 | 1724.54 + 7 | 3833.67 + 8 | 9134.08 + 11 | NaN +(10 rows) + select avg(cfloat8) from aggfns; avg ----------------- @@ -358,6 +479,21 @@ select s, avg(cfloat8) from aggfns group by s order by avg(cfloat8), s limit 10; 1 | 13 (10 rows) +select ss, avg(cfloat8) from aggfns group by ss order by avg(cfloat8), ss limit 10; + ss | avg +----+-------------------- + 0 | -0.306925132697215 + 8 | -0.268692900155438 + 4 | -0.224160255000712 + 3 | -0.153492446187821 + 9 | -0.114842409039848 + 7 | -0.063637967283139 + 5 | 0.0438265096326359 + 6 | 0.169599099685438 + | 5.42090986487701 + 11 | 6.59778165165114 +(10 rows) + select max(cfloat8) from aggfns; max ------------------ @@ -379,6 +515,21 @@ select s, max(cfloat8) from aggfns group by s order by max(cfloat8), s limit 10; 9 | 49.9995574122295 (10 rows) +select ss, max(cfloat8) from aggfns group by ss order by max(cfloat8), ss limit 10; + ss | max +----+------------------ + | 46.3985309237614 + 5 | 49.9874341068789 + 3 | 49.9890822684392 + 6 | 49.9939429108053 + 8 | 49.9963666079566 + 0 | 49.9965498689562 + 7 | 49.9973275698721 + 11 | 49.9975695507601 + 4 | 49.9978997278959 + 9 | 49.9995574122295 +(10 rows) + select min(cfloat8) from aggfns; min ------------------- @@ -400,6 +551,21 @@ select s, min(cfloat8) from aggfns group by s order by min(cfloat8), s limit 10; 1 | 13 (10 rows) +select ss, min(cfloat8) from aggfns group by ss order by min(cfloat8), ss limit 10; + ss | min +----+------------------- + 0 | -49.9994775978848 + 11 | -49.9985320260748 + 4 | -49.9983572866768 + 3 | -49.9977725092322 + 6 | -49.9967515002936 + 9 | -49.992344272323 + 5 | -49.9921301845461 + 7 | -49.99003498815 + 8 | -49.9897602945566 + | -38.5084833716974 +(10 rows) + select stddev(cfloat8) from aggfns; stddev ------------------ @@ -421,6 +587,21 @@ select s, stddev(cfloat8) from aggfns group by s order by stddev(cfloat8), s lim 7 | 28.9656492103737 (10 rows) +select ss, stddev(cfloat8) from aggfns group by ss order by stddev(cfloat8), ss limit 10; + ss | stddev +----+------------------ + 11 | 21.3262797346004 + | 22.894065438835 + 9 | 28.7642081921344 + 4 | 28.7760615445521 + 5 | 28.7843925303698 + 6 | 28.8543767497508 + 3 | 28.926156595386 + 8 | 28.96331707256 + 0 | 28.9653425568561 + 7 | 28.9656492103736 +(10 rows) + select sum(cfloat8) from aggfns; sum ----------------- @@ -442,6 +623,21 @@ select s, sum(cfloat8) from aggfns group by s order by sum(cfloat8), s limit 10; 1 | 260000 (10 rows) +select ss, sum(cfloat8) from aggfns group by ss order by sum(cfloat8), ss limit 10; + ss | sum +----+------------------- + 0 | -6138.50265394431 + 8 | -5373.85800310876 + 4 | -4478.94605516922 + 3 | -3066.93256727885 + 9 | -2296.84818079695 + 7 | -1272.75934566278 + | 102.997287432663 + 5 | 876.530192652717 + 6 | 3391.98199370876 + 11 | 264036.623917427 +(10 rows) + select avg(cint2) from aggfns; avg ---------------------- @@ -463,6 +659,21 @@ select s, avg(cint2) from aggfns group by s order by avg(cint2), s limit 10; 5 | 110.0305290025524248 (10 rows) +select ss, avg(cint2) from aggfns group by ss order by avg(cint2), ss limit 10; + ss | avg +----+------------------------ + | -1368.1578947368421053 + 8 | -129.4959711726139833 + 3 | -94.5546037471195271 + 6 | -61.0756218407487113 + 7 | -55.8695260497472599 + 11 | -33.7550336409794652 + 4 | -27.5652740206392145 + 9 | -21.7994594865121866 + 0 | 17.5951654071367799 + 5 | 110.0305290025524248 +(10 rows) + select count(cint2) from aggfns; count -------- @@ -484,6 +695,21 @@ select s, count(cint2) from aggfns group by s order by count(cint2), s limit 10; 9 | 19981 (10 rows) +select ss, count(cint2) from aggfns group by ss order by count(cint2), ss limit 10; + ss | count +----+------- + | 19 + 3 | 19962 + 4 | 19962 + 0 | 19981 + 5 | 19981 + 6 | 19981 + 7 | 19981 + 8 | 19981 + 9 | 19981 + 11 | 39981 +(10 rows) + select max(cint2) from aggfns; max ------- @@ -505,6 +731,21 @@ select s, max(cint2) from aggfns group by s order by max(cint2), s limit 10; 9 | 16383 (10 rows) +select ss, max(cint2) from aggfns group by ss order by max(cint2), ss limit 10; + ss | max +----+------- + | 16362 + 3 | 16380 + 5 | 16381 + 7 | 16381 + 8 | 16382 + 0 | 16383 + 4 | 16383 + 6 | 16383 + 9 | 16383 + 11 | 16383 +(10 rows) + select min(cint2) from aggfns; min -------- @@ -526,6 +767,21 @@ select s, min(cint2) from aggfns group by s order by min(cint2), s limit 10; 9 | -16375 (10 rows) +select ss, min(cint2) from aggfns group by ss order by min(cint2), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 4 | -16383 + 5 | -16383 + 6 | -16383 + 7 | -16382 + 8 | -16382 + 11 | -16382 + 3 | -16381 + 9 | -16375 + | -16100 +(10 rows) + select stddev(cint2) from aggfns; stddev ------------------- @@ -547,6 +803,21 @@ select s, stddev(cint2) from aggfns group by s order by stddev(cint2), s limit 1 1 | 9528.039076724276 (10 rows) +select ss, stddev(cint2) from aggfns group by ss order by stddev(cint2), ss limit 10; + ss | stddev +----+------------------- + | 8413.549166956554 + 9 | 9450.322790943425 + 7 | 9462.161209850735 + 6 | 9467.569674984571 + 5 | 9467.776835158782 + 3 | 9474.482349111595 + 8 | 9477.586839536066 + 4 | 9483.611454519949 + 0 | 9484.907423282680 + 11 | 9494.206429493352 +(10 rows) + select sum(cint2) from aggfns; sum ---------- @@ -568,6 +839,21 @@ select s, sum(cint2) from aggfns group by s order by sum(cint2), s limit 10; 5 | 2198520 (10 rows) +select ss, sum(cint2) from aggfns group by ss order by sum(cint2), ss limit 10; + ss | sum +----+---------- + 8 | -2587459 + 3 | -1887499 + 11 | -1349560 + 6 | -1220352 + 7 | -1116329 + 4 | -550258 + 9 | -435575 + | -25995 + 0 | 351569 + 5 | 2198520 +(10 rows) + select avg(cint4) from aggfns; avg --------------------- @@ -589,6 +875,21 @@ select s, avg(cint4) from aggfns group by s order by avg(cint4), s limit 10; 5 | 103.1069000000000000 (10 rows) +select ss, avg(cint4) from aggfns group by ss order by avg(cint4), ss limit 10; + ss | avg +----+----------------------- + 9 | -102.4283000000000000 + 6 | -53.1566500000000000 + 7 | -42.6121500000000000 + 8 | -29.2615500000000000 + 11 | -16.4247732327144606 + 4 | 9.6930584054852110 + 0 | 27.7536500000000000 + 3 | 68.3874180471447875 + 5 | 103.1069000000000000 + | 2197.6842105263157895 +(10 rows) + select max(cint4) from aggfns; max ------- @@ -610,6 +911,21 @@ select s, max(cint4) from aggfns group by s order by max(cint4), s limit 10; 9 | 16383 (10 rows) +select ss, max(cint4) from aggfns group by ss order by max(cint4), ss limit 10; + ss | max +----+------- + | 14812 + 3 | 16379 + 5 | 16379 + 7 | 16379 + 0 | 16383 + 4 | 16383 + 6 | 16383 + 8 | 16383 + 9 | 16383 + 11 | 16383 +(10 rows) + select min(cint4) from aggfns; min -------- @@ -631,6 +947,21 @@ select s, min(cint4) from aggfns group by s order by min(cint4), s limit 10; 5 | -16380 (10 rows) +select ss, min(cint4) from aggfns group by ss order by min(cint4), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 7 | -16383 + 11 | -16383 + 3 | -16382 + 4 | -16382 + 6 | -16382 + 8 | -16382 + 9 | -16382 + 5 | -16380 + | -15907 +(10 rows) + select stddev(cint4) from aggfns; stddev ------------------- @@ -652,6 +983,21 @@ select s, stddev(cint4) from aggfns group by s order by stddev(cint4), s limit 1 5 | 9504.684751625578 (10 rows) +select ss, stddev(cint4) from aggfns group by ss order by stddev(cint4), ss limit 10; + ss | stddev +----+------------------- + | 9361.317298404296 + 0 | 9406.815855797801 + 6 | 9410.397911988306 + 9 | 9426.452583637956 + 4 | 9442.480718256247 + 8 | 9450.281544631633 + 11 | 9450.690059613938 + 3 | 9474.873657491443 + 7 | 9485.765898279180 + 5 | 9504.684751625578 +(10 rows) + select sum(cint4) from aggfns; sum --------- @@ -673,6 +1019,21 @@ select s, sum(cint4) from aggfns group by s order by sum(cint4), s limit 10; 5 | 2062138 (10 rows) +select ss, sum(cint4) from aggfns group by ss order by sum(cint4), ss limit 10; + ss | sum +----+---------- + 9 | -2048566 + 6 | -1063133 + 7 | -852243 + 11 | -657303 + 8 | -585231 + | 41756 + 4 | 193677 + 0 | 555073 + 3 | 1366449 + 5 | 2062138 +(10 rows) + select avg(cint8) from aggfns; avg ---------------------- @@ -694,6 +1055,21 @@ select s, avg(cint8) from aggfns group by s order by avg(cint8), s limit 10; 9 | 61.7467500000000000 (10 rows) +select ss, avg(cint8) from aggfns group by ss order by avg(cint8), ss limit 10; + ss | avg +----+----------------------- + 8 | -118.4870000000000000 + 5 | -81.6955500000000000 + 4 | -17.0811771182623492 + 11 | -15.1685449411529523 + 7 | -2.3563500000000000 + 6 | 11.9056500000000000 + 0 | 15.3018000000000000 + 3 | 37.6662329212752115 + 9 | 61.7467500000000000 + | 2467.2631578947368421 +(10 rows) + select max(cint8) from aggfns; max ------- @@ -715,6 +1091,21 @@ select s, max(cint8) from aggfns group by s order by max(cint8), s limit 10; 5 | 16383 (10 rows) +select ss, max(cint8) from aggfns group by ss order by max(cint8), ss limit 10; + ss | max +----+------- + | 13750 + 6 | 16380 + 7 | 16380 + 8 | 16380 + 3 | 16382 + 9 | 16382 + 0 | 16383 + 4 | 16383 + 5 | 16383 + 11 | 16383 +(10 rows) + select min(cint8) from aggfns; min -------- @@ -736,6 +1127,21 @@ select s, min(cint8) from aggfns group by s order by min(cint8), s limit 10; 3 | -16378 (10 rows) +select ss, min(cint8) from aggfns group by ss order by min(cint8), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 6 | -16383 + 7 | -16383 + 8 | -16383 + 11 | -16383 + 5 | -16382 + 4 | -16381 + 9 | -16380 + 3 | -16378 + | -14174 +(10 rows) + select sum(cint8) from aggfns; sum ---------- @@ -757,6 +1163,21 @@ select s, sum(cint8) from aggfns group by s order by sum(cint8), s limit 10; 9 | 1234935 (10 rows) +select ss, sum(cint8) from aggfns group by ss order by sum(cint8), ss limit 10; + ss | sum +----+---------- + 8 | -2369740 + 5 | -1633911 + 11 | -607030 + 4 | -341299 + 7 | -47127 + | 46878 + 6 | 238113 + 0 | 306036 + 3 | 752609 + 9 | 1234935 +(10 rows) + select max(cts) from aggfns; max -------------------------- @@ -778,6 +1199,21 @@ select s, max(cts) from aggfns group by s order by max(cts), s limit 10; 9 | Sat Jan 02 02:01:01 2021 (10 rows) +select ss, max(cts) from aggfns group by ss order by max(cts), ss limit 10; + ss | max +----+-------------------------- + 0 | Fri Jan 01 01:01:01 2021 + 3 | Fri Jan 01 09:21:01 2021 + | Fri Jan 01 09:21:01 2021 + 4 | Fri Jan 01 12:07:41 2021 + 11 | Fri Jan 01 12:07:41 2021 + 5 | Fri Jan 01 14:54:21 2021 + 6 | Fri Jan 01 17:41:01 2021 + 7 | Fri Jan 01 20:27:41 2021 + 8 | Fri Jan 01 23:14:21 2021 + 9 | Sat Jan 02 02:01:01 2021 +(10 rows) + select min(cts) from aggfns; min -------------------------- @@ -799,6 +1235,21 @@ select s, min(cts) from aggfns group by s order by min(cts), s limit 10; 9 | Sat Jan 02 02:01:01 2021 (10 rows) +select ss, min(cts) from aggfns group by ss order by min(cts), ss limit 10; + ss | min +----+-------------------------- + 0 | Fri Jan 01 01:01:01 2021 + 11 | Fri Jan 01 03:47:41 2021 + 3 | Fri Jan 01 09:21:01 2021 + | Fri Jan 01 09:21:01 2021 + 4 | Fri Jan 01 12:07:41 2021 + 5 | Fri Jan 01 14:54:21 2021 + 6 | Fri Jan 01 17:41:01 2021 + 7 | Fri Jan 01 20:27:41 2021 + 8 | Fri Jan 01 23:14:21 2021 + 9 | Sat Jan 02 02:01:01 2021 +(10 rows) + select max(ctstz) from aggfns; max ------------------------------ @@ -820,6 +1271,21 @@ select s, max(ctstz) from aggfns group by s order by max(ctstz), s limit 10; 9 | Sat Jan 02 02:01:01 2021 PST (10 rows) +select ss, max(ctstz) from aggfns group by ss order by max(ctstz), ss limit 10; + ss | max +----+------------------------------ + 0 | Fri Jan 01 01:01:01 2021 PST + 3 | Fri Jan 01 09:21:01 2021 PST + | Fri Jan 01 09:21:01 2021 PST + 4 | Fri Jan 01 12:07:41 2021 PST + 11 | Fri Jan 01 12:07:41 2021 PST + 5 | Fri Jan 01 14:54:21 2021 PST + 6 | Fri Jan 01 17:41:01 2021 PST + 7 | Fri Jan 01 20:27:41 2021 PST + 8 | Fri Jan 01 23:14:21 2021 PST + 9 | Sat Jan 02 02:01:01 2021 PST +(10 rows) + select min(ctstz) from aggfns; min ------------------------------ @@ -841,6 +1307,21 @@ select s, min(ctstz) from aggfns group by s order by min(ctstz), s limit 10; 9 | Sat Jan 02 02:01:01 2021 PST (10 rows) +select ss, min(ctstz) from aggfns group by ss order by min(ctstz), ss limit 10; + ss | min +----+------------------------------ + 0 | Fri Jan 01 01:01:01 2021 PST + 11 | Fri Jan 01 03:47:41 2021 PST + 3 | Fri Jan 01 09:21:01 2021 PST + | Fri Jan 01 09:21:01 2021 PST + 4 | Fri Jan 01 12:07:41 2021 PST + 5 | Fri Jan 01 14:54:21 2021 PST + 6 | Fri Jan 01 17:41:01 2021 PST + 7 | Fri Jan 01 20:27:41 2021 PST + 8 | Fri Jan 01 23:14:21 2021 PST + 9 | Sat Jan 02 02:01:01 2021 PST +(10 rows) + select avg(s) from aggfns; avg -------------------- @@ -862,6 +1343,21 @@ select s, avg(s) from aggfns group by s order by avg(s), s limit 10; 9 | 9.0000000000000000 (10 rows) +select ss, avg(s) from aggfns group by ss order by avg(s), ss limit 10; + ss | avg +----+---------------------------- + 0 | 0.000000000000000000000000 + 11 | 1.5011869362053025 + 3 | 3.0000000000000000 + | 3.0000000000000000 + 4 | 4.0000000000000000 + 5 | 5.0000000000000000 + 6 | 6.0000000000000000 + 7 | 7.0000000000000000 + 8 | 8.0000000000000000 + 9 | 9.0000000000000000 +(10 rows) + select count(s) from aggfns; count -------- @@ -883,6 +1379,21 @@ select s, count(s) from aggfns group by s order by count(s), s limit 10; 9 | 20000 (10 rows) +select ss, count(s) from aggfns group by ss order by count(s), ss limit 10; + ss | count +----+------- + | 19 + 3 | 19981 + 4 | 19981 + 0 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 + 11 | 40019 +(10 rows) + select max(s) from aggfns; max ----- @@ -904,6 +1415,21 @@ select s, max(s) from aggfns group by s order by max(s), s limit 10; 9 | 9 (10 rows) +select ss, max(s) from aggfns group by ss order by max(s), ss limit 10; + ss | max +----+----- + 0 | 0 + 3 | 3 + | 3 + 4 | 4 + 11 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + select min(s) from aggfns; min ----- @@ -925,6 +1451,21 @@ select s, min(s) from aggfns group by s order by min(s), s limit 10; 9 | 9 (10 rows) +select ss, min(s) from aggfns group by ss order by min(s), ss limit 10; + ss | min +----+----- + 0 | 0 + 11 | 1 + 3 | 3 + | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + select stddev(s) from aggfns; stddev -------------------- @@ -946,6 +1487,21 @@ select s, stddev(s) from aggfns group by s order by stddev(s), s limit 10; 9 | 0 (10 rows) +select ss, stddev(s) from aggfns group by ss order by stddev(s), ss limit 10; + ss | stddev +----+------------------------ + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + | 0 + 11 | 0.50284545977155885187 +(10 rows) + select sum(s) from aggfns; sum -------- @@ -967,6 +1523,21 @@ select s, sum(s) from aggfns group by s order by sum(s), s limit 10; 9 | 180000 (10 rows) +select ss, sum(s) from aggfns group by ss order by sum(s), ss limit 10; + ss | sum +----+-------- + 0 | 0 + | 57 + 3 | 59943 + 11 | 60076 + 4 | 79924 + 5 | 100000 + 6 | 120000 + 7 | 140000 + 8 | 160000 + 9 | 180000 +(10 rows) + select avg(ss) from aggfns; avg -------------------- @@ -988,6 +1559,21 @@ select s, avg(ss) from aggfns group by s order by avg(ss), s limit 10; 2 | 11.0000000000000000 (10 rows) +select ss, avg(ss) from aggfns group by ss order by avg(ss), ss limit 10; + ss | avg +----+---------------------------- + 0 | 0.000000000000000000000000 + 3 | 3.0000000000000000 + 4 | 4.0000000000000000 + 5 | 5.0000000000000000 + 6 | 6.0000000000000000 + 7 | 7.0000000000000000 + 8 | 8.0000000000000000 + 9 | 9.0000000000000000 + 11 | 11.0000000000000000 + | +(10 rows) + select max(ss) from aggfns; max ----- @@ -1009,6 +1595,21 @@ select s, max(ss) from aggfns group by s order by max(ss), s limit 10; 4 | 11 (10 rows) +select ss, max(ss) from aggfns group by ss order by max(ss), ss limit 10; + ss | max +----+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 11 | 11 + | +(10 rows) + select min(ss) from aggfns; min ----- @@ -1030,6 +1631,21 @@ select s, min(ss) from aggfns group by s order by min(ss), s limit 10; 2 | 11 (10 rows) +select ss, min(ss) from aggfns group by ss order by min(ss), ss limit 10; + ss | min +----+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 11 | 11 + | +(10 rows) + select stddev(ss) from aggfns; stddev -------------------- @@ -1051,6 +1667,21 @@ select s, stddev(ss) from aggfns group by s order by stddev(ss), s limit 10; 4 | 0.21565737387148452722 (10 rows) +select ss, stddev(ss) from aggfns group by ss order by stddev(ss), ss limit 10; + ss | stddev +----+-------- + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0 + | +(10 rows) + select sum(ss) from aggfns; sum --------- @@ -1072,6 +1703,21 @@ select s, sum(ss) from aggfns group by s order by sum(ss), s limit 10; 2 | 220000 (10 rows) +select ss, sum(ss) from aggfns group by ss order by sum(ss), ss limit 10; + ss | sum +----+-------- + 0 | 0 + 3 | 59943 + 4 | 79924 + 5 | 100000 + 6 | 120000 + 7 | 140000 + 8 | 160000 + 9 | 180000 + 11 | 440209 + | +(10 rows) + select max(t) from aggfns; max -------- @@ -1093,6 +1739,21 @@ select s, max(t) from aggfns group by s order by max(t), s limit 10; 9 | 110000 (10 rows) +select ss, max(t) from aggfns group by ss order by max(t), ss limit 10; + ss | max +----+-------- + 0 | 20000 + | 49491 + 3 | 50000 + 11 | 59192 + 4 | 60000 + 5 | 70000 + 6 | 80000 + 7 | 90000 + 8 | 100000 + 9 | 110000 +(10 rows) + select min(t) from aggfns; min ----- @@ -1114,6 +1775,21 @@ select s, min(t) from aggfns group by s order by min(t), s limit 10; 9 | 90001 (10 rows) +select ss, min(t) from aggfns group by ss order by min(t), ss limit 10; + ss | min +----+------- + 0 | 1 + 11 | 10001 + 3 | 30001 + | 30537 + 4 | 40001 + 5 | 50001 + 6 | 60001 + 7 | 70001 + 8 | 80001 + 9 | 90001 +(10 rows) + select count(*) from aggfns where cfloat8 > 0; count -------- @@ -1135,6 +1811,21 @@ select s, count(*) from aggfns where cfloat8 > 0 group by s order by count(*), s 1 | 20000 (10 rows) +select ss, count(*) from aggfns where cfloat8 > 0 group by ss order by count(*), ss limit 10; + ss | count +----+------- + | 13 + 4 | 9872 + 0 | 9881 + 9 | 9945 + 3 | 9950 + 8 | 9950 + 5 | 9972 + 7 | 10021 + 6 | 10097 + 11 | 30084 +(10 rows) + select max(cdate) from aggfns where cfloat8 > 0; max ------------ @@ -1156,6 +1847,21 @@ select s, max(cdate) from aggfns where cfloat8 > 0 group by s order by max(cdate 9 | 06-01-2267 (10 rows) +select ss, max(cdate) from aggfns where cfloat8 > 0 group by ss order by max(cdate), ss limit 10; + ss | max +----+------------ + 0 | 01-01-2021 + 3 | 02-21-2103 + | 02-21-2103 + 4 | 07-09-2130 + 11 | 07-09-2130 + 5 | 11-24-2157 + 6 | 04-11-2185 + 7 | 08-28-2212 + 8 | 01-14-2240 + 9 | 06-01-2267 +(10 rows) + select min(cdate) from aggfns where cfloat8 > 0; min ------------ @@ -1177,6 +1883,21 @@ select s, min(cdate) from aggfns where cfloat8 > 0 group by s order by min(cdate 9 | 06-01-2267 (10 rows) +select ss, min(cdate) from aggfns where cfloat8 > 0 group by ss order by min(cdate), ss limit 10; + ss | min +----+------------ + 0 | 01-01-2021 + 11 | 05-19-2048 + 3 | 02-21-2103 + | 02-21-2103 + 4 | 07-09-2130 + 5 | 11-24-2157 + 6 | 04-11-2185 + 7 | 08-28-2212 + 8 | 01-14-2240 + 9 | 06-01-2267 +(10 rows) + select avg(cfloat4) from aggfns where cfloat8 > 0; avg ----- @@ -1198,6 +1919,21 @@ select s, avg(cfloat4) from aggfns where cfloat8 > 0 group by s order by avg(cfl 1 | NaN (10 rows) +select ss, avg(cfloat4) from aggfns where cfloat8 > 0 group by ss order by avg(cfloat4), ss limit 10; + ss | avg +----+-------------------- + 3 | -Infinity + 4 | -0.458554823065329 + 0 | -0.334856044433109 + 9 | -0.208302219537011 + 6 | 0.199537611181853 + 7 | 0.313851696029514 + 5 | 0.374879026647364 + 8 | 0.606801085094336 + | 1.47322510755979 + 11 | NaN +(10 rows) + select max(cfloat4) from aggfns where cfloat8 > 0; max ----- @@ -1219,6 +1955,21 @@ select s, max(cfloat4) from aggfns where cfloat8 > 0 group by s order by max(cfl 1 | NaN (10 rows) +select ss, max(cfloat4) from aggfns where cfloat8 > 0 group by ss order by max(cfloat4), ss limit 10; + ss | max +----+--------- + | 47.2047 + 9 | 49.9744 + 3 | 49.9744 + 0 | 49.9863 + 8 | 49.9923 + 4 | 49.9928 + 6 | 49.9956 + 7 | 49.9969 + 5 | 49.9992 + 11 | NaN +(10 rows) + select min(cfloat4) from aggfns where cfloat8 > 0; min ----------- @@ -1240,6 +1991,21 @@ select s, min(cfloat4) from aggfns where cfloat8 > 0 group by s order by min(cfl 6 | -49.9891 (10 rows) +select ss, min(cfloat4) from aggfns where cfloat8 > 0 group by ss order by min(cfloat4), ss limit 10; + ss | min +----+----------- + 3 | -Infinity + 4 | -49.9993 + 8 | -49.9969 + 7 | -49.9969 + 0 | -49.9915 + 9 | -49.9911 + 5 | -49.9892 + 6 | -49.9891 + | -41.6131 + 11 | NaN +(10 rows) + select stddev(cfloat4) from aggfns where cfloat8 > 0; stddev -------- @@ -1261,6 +2027,21 @@ select s, stddev(cfloat4) from aggfns where cfloat8 > 0 group by s order by stdd 3 | NaN (10 rows) +select ss, stddev(cfloat4) from aggfns where cfloat8 > 0 group by ss order by stddev(cfloat4), ss limit 10; + ss | stddev +----+------------------ + 7 | 28.7246858657947 + 0 | 28.7315562731003 + 9 | 28.7729261590403 + 4 | 28.8497176060195 + 5 | 28.9107809470208 + 6 | 28.9388387251543 + 8 | 29.1042713834566 + | 29.539145536489 + 3 | NaN + 11 | NaN +(10 rows) + select sum(cfloat4) from aggfns where cfloat8 > 0; sum ----- @@ -1282,6 +2063,21 @@ select s, sum(cfloat4) from aggfns where cfloat8 > 0 group by s order by sum(cfl 1 | NaN (10 rows) +select ss, sum(cfloat4) from aggfns where cfloat8 > 0 group by ss order by sum(cfloat4), ss limit 10; + ss | sum +----+----------- + 3 | -Infinity + 4 | -4526.85 + 0 | -3308.71 + 9 | -2071.57 + | 19.1519 + 6 | 2014.73 + 7 | 3145.11 + 5 | 3738.29 + 8 | 6037.67 + 11 | NaN +(10 rows) + select avg(cfloat8) from aggfns where cfloat8 > 0; avg ------------------ @@ -1303,6 +2099,21 @@ select s, avg(cfloat8) from aggfns where cfloat8 > 0 group by s order by avg(cfl 0 | 25.0776526587937 (10 rows) +select ss, avg(cfloat8) from aggfns where cfloat8 > 0 group by ss order by avg(cfloat8), ss limit 10; + ss | avg +----+------------------ + | 16.6705740293345 + 11 | 16.9860875451313 + 6 | 24.9229571834467 + 9 | 24.933601739557 + 8 | 24.9404756362227 + 4 | 24.9719502302445 + 7 | 24.9965050319499 + 5 | 25.0141908239782 + 3 | 25.0352176289523 + 0 | 25.0776526587937 +(10 rows) + select max(cfloat8) from aggfns where cfloat8 > 0; max ------------------ @@ -1324,6 +2135,21 @@ select s, max(cfloat8) from aggfns where cfloat8 > 0 group by s order by max(cfl 9 | 49.9995574122295 (10 rows) +select ss, max(cfloat8) from aggfns where cfloat8 > 0 group by ss order by max(cfloat8), ss limit 10; + ss | max +----+------------------ + | 46.3985309237614 + 5 | 49.9874341068789 + 3 | 49.9890822684392 + 6 | 49.9939429108053 + 8 | 49.9963666079566 + 0 | 49.9965498689562 + 7 | 49.9973275698721 + 11 | 49.9975695507601 + 4 | 49.9978997278959 + 9 | 49.9995574122295 +(10 rows) + select min(cfloat8) from aggfns where cfloat8 > 0; min ---------------------- @@ -1345,6 +2171,21 @@ select s, min(cfloat8) from aggfns where cfloat8 > 0 group by s order by min(cfl 1 | 13 (10 rows) +select ss, min(cfloat8) from aggfns where cfloat8 > 0 group by ss order by min(cfloat8), ss limit 10; + ss | min +----+---------------------- + 4 | 0.000765081495046616 + 7 | 0.000956561416387558 + 6 | 0.00179046764969826 + 0 | 0.00247885473072529 + 11 | 0.00441970769315958 + 3 | 0.00545482616871595 + 5 | 0.00628724228590727 + 9 | 0.0187294092029333 + 8 | 0.0195798231288791 + | 0.312147964723408 +(10 rows) + select stddev(cfloat8) from aggfns where cfloat8 > 0; stddev ------------------ @@ -1366,6 +2207,21 @@ select s, stddev(cfloat8) from aggfns where cfloat8 > 0 group by s order by stdd 8 | 14.507225286092 (10 rows) +select ss, stddev(cfloat8) from aggfns where cfloat8 > 0 group by ss order by stddev(cfloat8), ss limit 10; + ss | stddev +----+------------------ + 11 | 10.0892977778207 + 9 | 14.3145979997847 + 3 | 14.3656116060957 + 4 | 14.4158826742614 + 6 | 14.4175557556357 + 5 | 14.4400766885504 + 0 | 14.4509605112521 + 7 | 14.4643374353136 + 8 | 14.507225286092 + | 15.8897779049656 +(10 rows) + select sum(cfloat8) from aggfns where cfloat8 > 0; sum ------------------ @@ -1387,6 +2243,21 @@ select s, sum(cfloat8) from aggfns where cfloat8 > 0 group by s order by sum(cfl 1 | 260000 (10 rows) +select ss, sum(cfloat8) from aggfns where cfloat8 > 0 group by ss order by sum(cfloat8), ss limit 10; + ss | sum +----+------------------ + | 216.717462381348 + 4 | 246523.092672974 + 0 | 247792.285921541 + 9 | 247964.669299894 + 8 | 248157.732580416 + 3 | 249100.415408076 + 5 | 249441.510896711 + 7 | 250489.97692517 + 6 | 251647.098681261 + 11 | 511009.457707731 +(10 rows) + select avg(cint2) from aggfns where cfloat8 > 0; avg ---------------------- @@ -1408,6 +2279,21 @@ select s, avg(cint2) from aggfns where cfloat8 > 0 group by s order by avg(cint2 5 | 153.6364822808954924 (10 rows) +select ss, avg(cint2) from aggfns where cfloat8 > 0 group by ss order by avg(cint2), ss limit 10; + ss | avg +----+------------------------ + | -2431.3076923076923077 + 9 | -192.8237544036235531 + 3 | -156.9368272809576501 + 7 | -142.7671027664036752 + 4 | -119.1966149792236749 + 6 | -98.2421689135606661 + 8 | -1.6297525648762824 + 11 | 7.3528100356037667 + 0 | 28.7771364925070879 + 5 | 153.6364822808954924 +(10 rows) + select count(cint2) from aggfns where cfloat8 > 0; count -------- @@ -1429,6 +2315,21 @@ select s, count(cint2) from aggfns where cfloat8 > 0 group by s order by count(c 1 | 19981 (10 rows) +select ss, count(cint2) from aggfns where cfloat8 > 0 group by ss order by count(cint2), ss limit 10; + ss | count +----+------- + | 13 + 4 | 9867 + 0 | 9876 + 9 | 9935 + 3 | 9941 + 8 | 9942 + 5 | 9961 + 7 | 10013 + 6 | 10088 + 11 | 30053 +(10 rows) + select max(cint2) from aggfns where cfloat8 > 0; max ------- @@ -1450,6 +2351,21 @@ select s, max(cint2) from aggfns where cfloat8 > 0 group by s order by max(cint2 9 | 16383 (10 rows) +select ss, max(cint2) from aggfns where cfloat8 > 0 group by ss order by max(cint2), ss limit 10; + ss | max +----+------- + | 7971 + 3 | 16380 + 8 | 16380 + 5 | 16381 + 6 | 16381 + 7 | 16381 + 0 | 16383 + 4 | 16383 + 9 | 16383 + 11 | 16383 +(10 rows) + select min(cint2) from aggfns where cfloat8 > 0; min -------- @@ -1471,6 +2387,21 @@ select s, min(cint2) from aggfns where cfloat8 > 0 group by s order by min(cint2 9 | -16375 (10 rows) +select ss, min(cint2) from aggfns where cfloat8 > 0 group by ss order by min(cint2), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 4 | -16383 + 6 | -16383 + 8 | -16382 + 5 | -16381 + 7 | -16380 + 3 | -16378 + 11 | -16378 + 9 | -16375 + | -16100 +(10 rows) + select stddev(cint2) from aggfns where cfloat8 > 0; stddev ------------------- @@ -1492,6 +2423,21 @@ select s, stddev(cint2) from aggfns where cfloat8 > 0 group by s order by stddev 1 | 9528.039076724276 (10 rows) +select ss, stddev(cint2) from aggfns where cfloat8 > 0 group by ss order by stddev(cint2), ss limit 10; + ss | stddev +----+------------------- + | 7759.524506314969 + 5 | 9422.095841513016 + 6 | 9433.502305093184 + 9 | 9441.945023643920 + 4 | 9447.849754018911 + 7 | 9460.956887483220 + 3 | 9463.490872675688 + 8 | 9466.374225763893 + 11 | 9488.645998388904 + 0 | 9519.824544774386 +(10 rows) + select sum(cint2) from aggfns where cfloat8 > 0; sum ---------- @@ -1513,6 +2459,21 @@ select s, sum(cint2) from aggfns where cfloat8 > 0 group by s order by sum(cint2 1 | 1837240 (10 rows) +select ss, sum(cint2) from aggfns where cfloat8 > 0 group by ss order by sum(cint2), ss limit 10; + ss | sum +----+---------- + 9 | -1915704 + 3 | -1560109 + 7 | -1429527 + 4 | -1176113 + 6 | -991067 + | -31607 + 8 | -16203 + 11 | 220974 + 0 | 284203 + 5 | 1530373 +(10 rows) + select avg(cint4) from aggfns where cfloat8 > 0; avg --------------------- @@ -1534,6 +2495,21 @@ select s, avg(cint4) from aggfns where cfloat8 > 0 group by s order by avg(cint4 3 | 170.6088527551942186 (10 rows) +select ss, avg(cint4) from aggfns where cfloat8 > 0 group by ss order by avg(cint4), ss limit 10; + ss | avg +----+----------------------- + 9 | -227.0452488687782805 + 6 | -94.7697335842329405 + 4 | -40.9285858995137763 + 7 | -7.9618800518910288 + 11 | -4.2226765057838053 + 8 | 30.7776884422110553 + 5 | 70.0002005615724027 + 0 | 78.5152312518975812 + 3 | 169.6967839195979899 + | 868.6923076923076923 +(10 rows) + select max(cint4) from aggfns where cfloat8 > 0; max ------- @@ -1555,6 +2531,21 @@ select s, max(cint4) from aggfns where cfloat8 > 0 group by s order by max(cint4 1 | 16383 (10 rows) +select ss, max(cint4) from aggfns where cfloat8 > 0 group by ss order by max(cint4), ss limit 10; + ss | max +----+------- + | 14812 + 3 | 16379 + 5 | 16379 + 7 | 16379 + 0 | 16380 + 6 | 16380 + 9 | 16381 + 4 | 16382 + 8 | 16382 + 11 | 16383 +(10 rows) + select min(cint4) from aggfns where cfloat8 > 0; min -------- @@ -1576,6 +2567,21 @@ select s, min(cint4) from aggfns where cfloat8 > 0 group by s order by min(cint4 8 | -16377 (10 rows) +select ss, min(cint4) from aggfns where cfloat8 > 0 group by ss order by min(cint4), ss limit 10; + ss | min +----+-------- + 7 | -16383 + 11 | -16383 + 0 | -16382 + 9 | -16382 + 5 | -16380 + 3 | -16379 + 4 | -16378 + 6 | -16378 + 8 | -16377 + | -15907 +(10 rows) + select stddev(cint4) from aggfns where cfloat8 > 0; stddev ------------------- @@ -1597,6 +2603,21 @@ select s, stddev(cint4) from aggfns where cfloat8 > 0 group by s order by stddev 3 | 9509.065450373130 (10 rows) +select ss, stddev(cint4) from aggfns where cfloat8 > 0 group by ss order by stddev(cint4), ss limit 10; + ss | stddev +----+------------------- + | 8985.945186647640 + 0 | 9368.404782340758 + 6 | 9385.470128440942 + 8 | 9411.536015886790 + 4 | 9416.391322858156 + 11 | 9460.260597896060 + 9 | 9474.284943213442 + 5 | 9475.929892556881 + 7 | 9500.872262505529 + 3 | 9510.123363067463 +(10 rows) + select sum(cint4) from aggfns where cfloat8 > 0; sum --------- @@ -1618,6 +2639,21 @@ select s, sum(cint4) from aggfns where cfloat8 > 0 group by s order by sum(cint4 3 | 1699776 (10 rows) +select ss, sum(cint4) from aggfns where cfloat8 > 0 group by ss order by sum(cint4), ss limit 10; + ss | sum +----+---------- + 9 | -2257965 + 6 | -956890 + 4 | -404047 + 11 | -127035 + 7 | -79786 + | 11293 + 8 | 306238 + 5 | 698042 + 0 | 775809 + 3 | 1688483 +(10 rows) + select avg(cint8) from aggfns where cfloat8 > 0; avg --------------------- @@ -1639,6 +2675,21 @@ select s, avg(cint8) from aggfns where cfloat8 > 0 group by s order by avg(cint8 2 | 148.9026206075044669 (10 rows) +select ss, avg(cint8) from aggfns where cfloat8 > 0 group by ss order by avg(cint8), ss limit 10; + ss | avg +----+------------------------ + 8 | -166.4501507537688442 + 5 | -78.9197753710389089 + 4 | -61.5197528363047002 + 6 | -32.8705556105773992 + 7 | 1.15707015267937331604 + 11 | 33.0028919026725170 + 0 | 42.9815808116587390 + 9 | 44.5682252388134741 + 3 | 106.1022110552763819 + | 2876.8461538461538462 +(10 rows) + select max(cint8) from aggfns where cfloat8 > 0; max ------- @@ -1660,6 +2711,21 @@ select s, max(cint8) from aggfns where cfloat8 > 0 group by s order by max(cint8 5 | 16383 (10 rows) +select ss, max(cint8) from aggfns where cfloat8 > 0 group by ss order by max(cint8), ss limit 10; + ss | max +----+------- + | 13750 + 7 | 16378 + 6 | 16379 + 0 | 16380 + 8 | 16380 + 3 | 16381 + 4 | 16382 + 9 | 16382 + 5 | 16383 + 11 | 16383 +(10 rows) + select min(cint8) from aggfns where cfloat8 > 0; min -------- @@ -1681,6 +2747,21 @@ select s, min(cint8) from aggfns where cfloat8 > 0 group by s order by min(cint8 3 | -16378 (10 rows) +select ss, min(cint8) from aggfns where cfloat8 > 0 group by ss order by min(cint8), ss limit 10; + ss | min +----+-------- + 7 | -16383 + 8 | -16383 + 11 | -16383 + 5 | -16382 + 4 | -16381 + 6 | -16381 + 9 | -16380 + 0 | -16379 + 3 | -16378 + | -11918 +(10 rows) + select sum(cint8) from aggfns where cfloat8 > 0; sum --------- @@ -1702,6 +2783,21 @@ select s, sum(cint8) from aggfns where cfloat8 > 0 group by s order by sum(cint8 2 | 1500045 (10 rows) +select ss, sum(cint8) from aggfns where cfloat8 > 0 group by ss order by sum(cint8), ss limit 10; + ss | sum +----+---------- + 8 | -1656179 + 5 | -786988 + 4 | -607323 + 6 | -331894 + 7 | 11595 + | 37399 + 0 | 424701 + 9 | 443231 + 11 | 992859 + 3 | 1055717 +(10 rows) + select max(cts) from aggfns where cfloat8 > 0; max -------------------------- @@ -1723,6 +2819,21 @@ select s, max(cts) from aggfns where cfloat8 > 0 group by s order by max(cts), s 9 | Sat Jan 02 02:01:01 2021 (10 rows) +select ss, max(cts) from aggfns where cfloat8 > 0 group by ss order by max(cts), ss limit 10; + ss | max +----+-------------------------- + 0 | Fri Jan 01 01:01:01 2021 + 3 | Fri Jan 01 09:21:01 2021 + | Fri Jan 01 09:21:01 2021 + 4 | Fri Jan 01 12:07:41 2021 + 11 | Fri Jan 01 12:07:41 2021 + 5 | Fri Jan 01 14:54:21 2021 + 6 | Fri Jan 01 17:41:01 2021 + 7 | Fri Jan 01 20:27:41 2021 + 8 | Fri Jan 01 23:14:21 2021 + 9 | Sat Jan 02 02:01:01 2021 +(10 rows) + select min(cts) from aggfns where cfloat8 > 0; min -------------------------- @@ -1744,6 +2855,21 @@ select s, min(cts) from aggfns where cfloat8 > 0 group by s order by min(cts), s 9 | Sat Jan 02 02:01:01 2021 (10 rows) +select ss, min(cts) from aggfns where cfloat8 > 0 group by ss order by min(cts), ss limit 10; + ss | min +----+-------------------------- + 0 | Fri Jan 01 01:01:01 2021 + 11 | Fri Jan 01 03:47:41 2021 + 3 | Fri Jan 01 09:21:01 2021 + | Fri Jan 01 09:21:01 2021 + 4 | Fri Jan 01 12:07:41 2021 + 5 | Fri Jan 01 14:54:21 2021 + 6 | Fri Jan 01 17:41:01 2021 + 7 | Fri Jan 01 20:27:41 2021 + 8 | Fri Jan 01 23:14:21 2021 + 9 | Sat Jan 02 02:01:01 2021 +(10 rows) + select max(ctstz) from aggfns where cfloat8 > 0; max ------------------------------ @@ -1765,6 +2891,21 @@ select s, max(ctstz) from aggfns where cfloat8 > 0 group by s order by max(ctstz 9 | Sat Jan 02 02:01:01 2021 PST (10 rows) +select ss, max(ctstz) from aggfns where cfloat8 > 0 group by ss order by max(ctstz), ss limit 10; + ss | max +----+------------------------------ + 0 | Fri Jan 01 01:01:01 2021 PST + 3 | Fri Jan 01 09:21:01 2021 PST + | Fri Jan 01 09:21:01 2021 PST + 4 | Fri Jan 01 12:07:41 2021 PST + 11 | Fri Jan 01 12:07:41 2021 PST + 5 | Fri Jan 01 14:54:21 2021 PST + 6 | Fri Jan 01 17:41:01 2021 PST + 7 | Fri Jan 01 20:27:41 2021 PST + 8 | Fri Jan 01 23:14:21 2021 PST + 9 | Sat Jan 02 02:01:01 2021 PST +(10 rows) + select min(ctstz) from aggfns where cfloat8 > 0; min ------------------------------ @@ -1786,6 +2927,21 @@ select s, min(ctstz) from aggfns where cfloat8 > 0 group by s order by min(ctstz 9 | Sat Jan 02 02:01:01 2021 PST (10 rows) +select ss, min(ctstz) from aggfns where cfloat8 > 0 group by ss order by min(ctstz), ss limit 10; + ss | min +----+------------------------------ + 0 | Fri Jan 01 01:01:01 2021 PST + 11 | Fri Jan 01 03:47:41 2021 PST + 3 | Fri Jan 01 09:21:01 2021 PST + | Fri Jan 01 09:21:01 2021 PST + 4 | Fri Jan 01 12:07:41 2021 PST + 5 | Fri Jan 01 14:54:21 2021 PST + 6 | Fri Jan 01 17:41:01 2021 PST + 7 | Fri Jan 01 20:27:41 2021 PST + 8 | Fri Jan 01 23:14:21 2021 PST + 9 | Sat Jan 02 02:01:01 2021 PST +(10 rows) + select avg(s) from aggfns where cfloat8 > 0; avg -------------------- @@ -1796,10 +2952,10 @@ select s, avg(s) from aggfns where cfloat8 > 0 group by s order by avg(s), s lim s | avg ---+------------------------ 0 | 0.00000000000000000000 - 1 | 11.0000000000000000 - 2 | 11.0000000000000000 + 1 | 1.00000000000000000000 + 2 | 2.0000000000000000 3 | 3.0000000000000000 - 4 | 4.0077441416071608 + 4 | 4.0000000000000000 5 | 5.0000000000000000 6 | 6.0000000000000000 7 | 7.0000000000000000 @@ -1807,6 +2963,21 @@ select s, avg(s) from aggfns where cfloat8 > 0 group by s order by avg(s), s lim 9 | 9.0000000000000000 (10 rows) +select ss, avg(s) from aggfns where cfloat8 > 0 group by ss order by avg(s), ss limit 10; + ss | avg +----+------------------------ + 0 | 0.00000000000000000000 + 11 | 1.3358595931392102 + 3 | 3.0000000000000000 + | 3.0000000000000000 + 4 | 4.0000000000000000 + 5 | 5.0000000000000000 + 6 | 6.0000000000000000 + 7 | 7.0000000000000000 + 8 | 8.0000000000000000 + 9 | 9.0000000000000000 +(10 rows) + select count(s) from aggfns where cfloat8 > 0; count -------- @@ -1828,20 +2999,35 @@ select s, count(s) from aggfns where cfloat8 > 0 group by s order by count(s), s 1 | 20000 (10 rows) +select ss, count(s) from aggfns where cfloat8 > 0 group by ss order by count(s), ss limit 10; + ss | count +----+------- + | 13 + 4 | 9872 + 0 | 9881 + 9 | 9945 + 3 | 9950 + 8 | 9950 + 5 | 9972 + 7 | 10021 + 6 | 10097 + 11 | 30084 +(10 rows) + select max(s) from aggfns where cfloat8 > 0; max ----- - 11 + 9 (1 row) select s, max(s) from aggfns where cfloat8 > 0 group by s order by max(s), s limit 10; s | max ---+----- 0 | 0 - 1 | 11 - 2 | 11 + 1 | 1 + 2 | 2 3 | 3 - 4 | 11 + 4 | 4 5 | 5 6 | 6 7 | 7 @@ -1849,6 +3035,21 @@ select s, max(s) from aggfns where cfloat8 > 0 group by s order by max(s), s lim 9 | 9 (10 rows) +select ss, max(s) from aggfns where cfloat8 > 0 group by ss order by max(s), ss limit 10; + ss | max +----+----- + 0 | 0 + 3 | 3 + | 3 + 4 | 4 + 11 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + select min(s) from aggfns where cfloat8 > 0; min ----- @@ -1859,8 +3060,8 @@ select s, min(s) from aggfns where cfloat8 > 0 group by s order by min(s), s lim s | min ---+----- 0 | 0 - 1 | 11 - 2 | 11 + 1 | 1 + 2 | 2 3 | 3 4 | 4 5 | 5 @@ -1870,6 +3071,21 @@ select s, min(s) from aggfns where cfloat8 > 0 group by s order by min(s), s lim 9 | 9 (10 rows) +select ss, min(s) from aggfns where cfloat8 > 0 group by ss order by min(s), ss limit 10; + ss | min +----+----- + 0 | 0 + 11 | 1 + 3 | 3 + | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + select stddev(s) from aggfns where cfloat8 > 0; stddev -------------------- @@ -1891,6 +3107,21 @@ select s, stddev(s) from aggfns where cfloat8 > 0 group by s order by stddev(s), 9 | 0 (10 rows) +select ss, stddev(s) from aggfns where cfloat8 > 0 group by ss order by stddev(s), ss limit 10; + ss | stddev +----+------------------------ + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + | 0 + 11 | 0.47440470436008342899 +(10 rows) + select sum(s) from aggfns where cfloat8 > 0; sum -------- @@ -1912,6 +3143,21 @@ select s, sum(s) from aggfns where cfloat8 > 0 group by s order by sum(s), s lim 9 | 89505 (10 rows) +select ss, sum(s) from aggfns where cfloat8 > 0 group by ss order by sum(s), ss limit 10; + ss | sum +----+------- + 0 | 0 + | 39 + 3 | 29850 + 4 | 39488 + 11 | 40188 + 5 | 49860 + 6 | 60582 + 7 | 70147 + 8 | 79600 + 9 | 89505 +(10 rows) + select avg(ss) from aggfns where cfloat8 > 0; avg -------------------- @@ -1933,6 +3179,21 @@ select s, avg(ss) from aggfns where cfloat8 > 0 group by s order by avg(ss), s l 2 | 11.0000000000000000 (10 rows) +select ss, avg(ss) from aggfns where cfloat8 > 0 group by ss order by avg(ss), ss limit 10; + ss | avg +----+------------------------ + 0 | 0.00000000000000000000 + 3 | 3.0000000000000000 + 4 | 4.0000000000000000 + 5 | 5.0000000000000000 + 6 | 6.0000000000000000 + 7 | 7.0000000000000000 + 8 | 8.0000000000000000 + 9 | 9.0000000000000000 + 11 | 11.0000000000000000 + | +(10 rows) + select max(ss) from aggfns where cfloat8 > 0; max ----- @@ -1954,6 +3215,21 @@ select s, max(ss) from aggfns where cfloat8 > 0 group by s order by max(ss), s l 4 | 11 (10 rows) +select ss, max(ss) from aggfns where cfloat8 > 0 group by ss order by max(ss), ss limit 10; + ss | max +----+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 11 | 11 + | +(10 rows) + select min(ss) from aggfns where cfloat8 > 0; min ----- @@ -1975,6 +3251,21 @@ select s, min(ss) from aggfns where cfloat8 > 0 group by s order by min(ss), s l 2 | 11 (10 rows) +select ss, min(ss) from aggfns where cfloat8 > 0 group by ss order by min(ss), ss limit 10; + ss | min +----+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 11 | 11 + | +(10 rows) + select stddev(ss) from aggfns where cfloat8 > 0; stddev -------------------- @@ -1996,6 +3287,21 @@ select s, stddev(ss) from aggfns where cfloat8 > 0 group by s order by stddev(ss 4 | 0.22257569540261848080 (10 rows) +select ss, stddev(ss) from aggfns where cfloat8 > 0 group by ss order by stddev(ss), ss limit 10; + ss | stddev +----+-------- + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0 + | +(10 rows) + select sum(ss) from aggfns where cfloat8 > 0; sum -------- @@ -2017,6 +3323,21 @@ select s, sum(ss) from aggfns where cfloat8 > 0 group by s order by sum(ss), s l 1 | 220000 (10 rows) +select ss, sum(ss) from aggfns where cfloat8 > 0 group by ss order by sum(ss), ss limit 10; + ss | sum +----+-------- + 0 | 0 + 3 | 29850 + 4 | 39488 + 5 | 49860 + 6 | 60582 + 7 | 70147 + 8 | 79600 + 9 | 89505 + 11 | 330924 + | +(10 rows) + select max(t) from aggfns where cfloat8 > 0; max -------- @@ -2038,6 +3359,21 @@ select s, max(t) from aggfns where cfloat8 > 0 group by s order by max(t), s lim 9 | 110000 (10 rows) +select ss, max(t) from aggfns where cfloat8 > 0 group by ss order by max(t), ss limit 10; + ss | max +----+-------- + 0 | 19999 + | 49491 + 3 | 49999 + 11 | 59192 + 4 | 60000 + 5 | 70000 + 6 | 79998 + 7 | 89995 + 8 | 99997 + 9 | 110000 +(10 rows) + select min(t) from aggfns where cfloat8 > 0; min ----- @@ -2059,6 +3395,21 @@ select s, min(t) from aggfns where cfloat8 > 0 group by s order by min(t), s lim 9 | 90002 (10 rows) +select ss, min(t) from aggfns where cfloat8 > 0 group by ss order by min(t), ss limit 10; + ss | min +----+------- + 0 | 1 + 11 | 10001 + 3 | 30001 + | 30537 + 4 | 40003 + 5 | 50001 + 6 | 60002 + 7 | 70001 + 8 | 80003 + 9 | 90002 +(10 rows) + select count(*) from aggfns where cfloat8 <= 0; count ------- @@ -2079,6 +3430,21 @@ select s, count(*) from aggfns where cfloat8 <= 0 group by s order by count(*), 0 | 10119 (9 rows) +select ss, count(*) from aggfns where cfloat8 <= 0 group by ss order by count(*), ss limit 10; + ss | count +----+------- + | 6 + 6 | 9903 + 11 | 9935 + 7 | 9979 + 5 | 10028 + 3 | 10031 + 8 | 10050 + 9 | 10055 + 4 | 10109 + 0 | 10119 +(10 rows) + select max(cdate) from aggfns where cfloat8 <= 0; max ------------ @@ -2099,6 +3465,21 @@ select s, max(cdate) from aggfns where cfloat8 <= 0 group by s order by max(cdat 9 | 06-01-2267 (9 rows) +select ss, max(cdate) from aggfns where cfloat8 <= 0 group by ss order by max(cdate), ss limit 10; + ss | max +----+------------ + 0 | 01-01-2021 + 3 | 02-21-2103 + | 02-21-2103 + 4 | 07-09-2130 + 11 | 07-09-2130 + 5 | 11-24-2157 + 6 | 04-11-2185 + 7 | 08-28-2212 + 8 | 01-14-2240 + 9 | 06-01-2267 +(10 rows) + select min(cdate) from aggfns where cfloat8 <= 0; min ------------ @@ -2119,6 +3500,21 @@ select s, min(cdate) from aggfns where cfloat8 <= 0 group by s order by min(cdat 9 | 06-01-2267 (9 rows) +select ss, min(cdate) from aggfns where cfloat8 <= 0 group by ss order by min(cdate), ss limit 10; + ss | min +----+------------ + 0 | 01-01-2021 + 11 | 10-05-2075 + 3 | 02-21-2103 + | 02-21-2103 + 4 | 07-09-2130 + 5 | 11-24-2157 + 6 | 04-11-2185 + 7 | 08-28-2212 + 8 | 01-14-2240 + 9 | 06-01-2267 +(10 rows) + select avg(cfloat4) from aggfns where cfloat8 <= 0; avg ---------- @@ -2139,6 +3535,21 @@ select s, avg(cfloat4) from aggfns where cfloat8 <= 0 group by s order by avg(cf 2 | Infinity (9 rows) +select ss, avg(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by avg(cfloat4), ss limit 10; + ss | avg +----+-------------------- + | -7.61213672161102 + 9 | -0.376175993822296 + 5 | -0.351281471733702 + 3 | -0.323676224863234 + 6 | -0.215785538185229 + 7 | 0.0690012062121504 + 4 | 0.113266462457489 + 8 | 0.308099926433512 + 0 | 0.497406092427368 + 11 | Infinity +(10 rows) + select max(cfloat4) from aggfns where cfloat8 <= 0; max ---------- @@ -2159,6 +3570,21 @@ select s, max(cfloat4) from aggfns where cfloat8 <= 0 group by s order by max(cf 2 | Infinity (9 rows) +select ss, max(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by max(cfloat4), ss limit 10; + ss | max +----+---------- + | 43.8334 + 5 | 49.9753 + 9 | 49.9899 + 7 | 49.992 + 6 | 49.9938 + 4 | 49.9946 + 3 | 49.9979 + 0 | 49.9995 + 8 | 49.9997 + 11 | Infinity +(10 rows) + select min(cfloat4) from aggfns where cfloat8 <= 0; min ---------- @@ -2179,6 +3605,21 @@ select s, min(cfloat4) from aggfns where cfloat8 <= 0 group by s order by min(cf 3 | -49.974 (9 rows) +select ss, min(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by min(cfloat4), ss limit 10; + ss | min +----+---------- + 4 | -49.9999 + 6 | -49.9995 + 11 | -49.9991 + 7 | -49.9984 + 0 | -49.9949 + 5 | -49.9942 + 9 | -49.9874 + 8 | -49.9853 + 3 | -49.974 + | -45.4083 +(10 rows) + select stddev(cfloat4) from aggfns where cfloat8 <= 0; stddev -------- @@ -2199,6 +3640,21 @@ select s, stddev(cfloat4) from aggfns where cfloat8 <= 0 group by s order by std 2 | NaN (9 rows) +select ss, stddev(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by stddev(cfloat4), ss limit 10; + ss | stddev +----+------------------ + 0 | 28.7188352112159 + 3 | 28.7564997868619 + 4 | 28.7937148382071 + 7 | 28.8547648614793 + 6 | 28.89882973622 + 8 | 28.9050890855561 + 9 | 28.9126192916064 + 5 | 29.1278202173095 + | 34.8729157239149 + 11 | NaN +(10 rows) + select sum(cfloat4) from aggfns where cfloat8 <= 0; sum ---------- @@ -2219,6 +3675,21 @@ select s, sum(cfloat4) from aggfns where cfloat8 <= 0 group by s order by sum(cf 2 | Infinity (9 rows) +select ss, sum(cfloat4) from aggfns where cfloat8 <= 0 group by ss order by sum(cfloat4), ss limit 10; + ss | sum +----+---------- + 9 | -3782.45 + 5 | -3522.65 + 3 | -3246.8 + 6 | -2136.92 + | -45.6728 + 7 | 688.563 + 4 | 1145.01 + 8 | 3096.4 + 0 | 5033.25 + 11 | Infinity +(10 rows) + select avg(cfloat8) from aggfns where cfloat8 <= 0; avg ------------------- @@ -2239,6 +3710,21 @@ select s, avg(cfloat8) from aggfns where cfloat8 <= 0 group by s order by avg(cf 5 | -24.7870942066272 (9 rows) +select ss, avg(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by avg(cfloat8), ss limit 10; + ss | avg +----+------------------- + 7 | -25.229255062715 + 8 | -25.2270239386592 + 3 | -25.1388045035744 + 0 | -25.0944548448943 + 6 | -25.0686778438405 + 9 | -24.8892608135943 + 11 | -24.858866008083 + 4 | -24.8295616508204 + 5 | -24.7870942066272 + | -18.9533624914475 +(10 rows) + select max(cfloat8) from aggfns where cfloat8 <= 0; max ---------------------- @@ -2259,6 +3745,21 @@ select s, max(cfloat8) from aggfns where cfloat8 <= 0 group by s order by max(cf 2 | -0.00172397121787071 (9 rows) +select ss, max(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by max(cfloat8), ss limit 10; + ss | max +----+---------------------- + | -5.18986904062331 + 0 | -0.00547224190086126 + 9 | -0.00466627534478903 + 4 | -0.0041270861402154 + 6 | -0.00408347696065903 + 7 | -0.00273226760327816 + 3 | -0.00268903095275164 + 5 | -0.00228420831263065 + 8 | -0.00182925723493099 + 11 | -0.00172397121787071 +(10 rows) + select min(cfloat8) from aggfns where cfloat8 <= 0; min ------------------- @@ -2279,6 +3780,21 @@ select s, min(cfloat8) from aggfns where cfloat8 <= 0 group by s order by min(cf 8 | -49.9897602945566 (9 rows) +select ss, min(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by min(cfloat8), ss limit 10; + ss | min +----+------------------- + 0 | -49.9994775978848 + 11 | -49.9985320260748 + 4 | -49.9983572866768 + 3 | -49.9977725092322 + 6 | -49.9967515002936 + 9 | -49.992344272323 + 5 | -49.9921301845461 + 7 | -49.99003498815 + 8 | -49.9897602945566 + | -38.5084833716974 +(10 rows) + select stddev(cfloat8) from aggfns where cfloat8 <= 0; stddev ------------------ @@ -2299,6 +3815,21 @@ select s, stddev(cfloat8) from aggfns where cfloat8 <= 0 group by s order by std 0 | 14.5136612753879 (9 rows) +select ss, stddev(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by stddev(cfloat8), ss limit 10; + ss | stddev +----+------------------ + 7 | 14.4030112329563 + 11 | 14.4033336871388 + 6 | 14.4144870413512 + 3 | 14.4335904065982 + 4 | 14.4339025361113 + 5 | 14.4378475427373 + 9 | 14.445355480345 + 8 | 14.4532419971748 + 0 | 14.5136612753879 + | 15.4584765893444 +(10 rows) + select sum(cfloat8) from aggfns where cfloat8 <= 0; sum ------------------- @@ -2319,6 +3850,21 @@ select s, sum(cfloat8) from aggfns where cfloat8 <= 0 group by s order by sum(cf 2 | -246743.521314557 (9 rows) +select ss, sum(cfloat8) from aggfns where cfloat8 <= 0 group by ss order by sum(cfloat8), ss limit 10; + ss | sum +----+------------------- + 0 | -253930.788575485 + 8 | -253531.590583525 + 3 | -252167.347975355 + 7 | -251762.736270833 + 4 | -251002.038728143 + 9 | -250261.517480691 + 5 | -248564.980704058 + 6 | -248255.116687552 + 11 | -246972.833790304 + | -113.720174948685 +(10 rows) + select avg(cint2) from aggfns where cfloat8 <= 0; avg ---------------------- @@ -2339,6 +3885,21 @@ select s, avg(cint2) from aggfns where cfloat8 <= 0 group by s order by avg(cint 9 | 147.3351582719490344 (9 rows) +select ss, avg(cint2) from aggfns where cfloat8 <= 0 group by ss order by avg(cint2), ss limit 10; + ss | avg +----+----------------------- + 8 | -256.1267058471959359 + 11 | -158.1923851732473811 + 3 | -32.6703921764294981 + 6 | -23.1764884261599110 + 0 | 6.6666006927263731 + 7 | 31.4203451043338684 + 4 | 61.9965329370975731 + 5 | 66.6813373253493014 + 9 | 147.3351582719490344 + | 935.3333333333333333 +(10 rows) + select count(cint2) from aggfns where cfloat8 <= 0; count ------- @@ -2359,6 +3920,21 @@ select s, count(cint2) from aggfns where cfloat8 <= 0 group by s order by count( 0 | 10105 (9 rows) +select ss, count(cint2) from aggfns where cfloat8 <= 0 group by ss order by count(cint2), ss limit 10; + ss | count +----+------- + | 6 + 6 | 9893 + 11 | 9928 + 7 | 9968 + 5 | 10020 + 3 | 10021 + 8 | 10039 + 9 | 10046 + 4 | 10095 + 0 | 10105 +(10 rows) + select max(cint2) from aggfns where cfloat8 <= 0; max ------- @@ -2379,6 +3955,21 @@ select s, max(cint2) from aggfns where cfloat8 <= 0 group by s order by max(cint 6 | 16383 (9 rows) +select ss, max(cint2) from aggfns where cfloat8 <= 0 group by ss order by max(cint2), ss limit 10; + ss | max +----+------- + | 16362 + 7 | 16376 + 9 | 16376 + 3 | 16378 + 0 | 16381 + 5 | 16381 + 11 | 16381 + 8 | 16382 + 4 | 16383 + 6 | 16383 +(10 rows) + select min(cint2) from aggfns where cfloat8 <= 0; min -------- @@ -2399,6 +3990,21 @@ select s, min(cint2) from aggfns where cfloat8 <= 0 group by s order by min(cint 9 | -16374 (9 rows) +select ss, min(cint2) from aggfns where cfloat8 <= 0 group by ss order by min(cint2), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 5 | -16383 + 6 | -16383 + 7 | -16382 + 8 | -16382 + 11 | -16382 + 3 | -16381 + 4 | -16379 + 9 | -16374 + | -7696 +(10 rows) + select stddev(cint2) from aggfns where cfloat8 <= 0; stddev ------------------- @@ -2419,6 +4025,21 @@ select s, stddev(cint2) from aggfns where cfloat8 <= 0 group by s order by stdde 4 | 9517.027301293118 (9 rows) +select ss, stddev(cint2) from aggfns where cfloat8 <= 0 group by ss order by stddev(cint2), ss limit 10; + ss | stddev +----+-------------------- + 0 | 9451.115288155243 + 9 | 9456.028731464701 + 7 | 9463.041992703462 + 3 | 9485.440311868001 + 8 | 9487.451140540082 + 6 | 9502.509922580216 + 11 | 9510.413974851870 + 5 | 9513.243501566793 + 4 | 9518.051043653511 + | 10051.146773710285 +(10 rows) + select sum(cint2) from aggfns where cfloat8 <= 0; sum ---------- @@ -2439,6 +4060,21 @@ select s, sum(cint2) from aggfns where cfloat8 <= 0 group by s order by sum(cint 9 | 1480129 (9 rows) +select ss, sum(cint2) from aggfns where cfloat8 <= 0 group by ss order by sum(cint2), ss limit 10; + ss | sum +----+---------- + 8 | -2571256 + 11 | -1570534 + 3 | -327390 + 6 | -229285 + | 5612 + 0 | 67366 + 7 | 313198 + 4 | 625855 + 5 | 668147 + 9 | 1480129 +(10 rows) + select avg(cint4) from aggfns where cfloat8 <= 0; avg --------------------- @@ -2459,6 +4095,21 @@ select s, avg(cint4) from aggfns where cfloat8 <= 0 group by s order by avg(cint 5 | 136.0287195851615477 (9 rows) +select ss, avg(cint4) from aggfns where cfloat8 <= 0 group by ss order by avg(cint4), ss limit 10; + ss | avg +----+----------------------- + 8 | -88.7033830845771144 + 7 | -77.4082573404148712 + 11 | -53.3737292400603926 + 3 | -32.1038779782673711 + 0 | -21.8140132424152584 + 6 | -10.7283651418761991 + 9 | 20.8253605171556440 + 4 | 59.1279058264912454 + 5 | 136.0287195851615477 + | 5077.1666666666666667 +(10 rows) + select max(cint4) from aggfns where cfloat8 <= 0; max ------- @@ -2479,6 +4130,21 @@ select s, max(cint4) from aggfns where cfloat8 <= 0 group by s order by max(cint 9 | 16383 (9 rows) +select ss, max(cint4) from aggfns where cfloat8 <= 0 group by ss order by max(cint4), ss limit 10; + ss | max +----+------- + | 13078 + 5 | 16364 + 7 | 16378 + 3 | 16379 + 11 | 16381 + 0 | 16383 + 4 | 16383 + 6 | 16383 + 8 | 16383 + 9 | 16383 +(10 rows) + select min(cint4) from aggfns where cfloat8 <= 0; min -------- @@ -2499,6 +4165,21 @@ select s, min(cint4) from aggfns where cfloat8 <= 0 group by s order by min(cint 5 | -16374 (9 rows) +select ss, min(cint4) from aggfns where cfloat8 <= 0 group by ss order by min(cint4), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 3 | -16382 + 4 | -16382 + 6 | -16382 + 8 | -16382 + 9 | -16381 + 7 | -16379 + 11 | -16377 + 5 | -16374 + | -8992 +(10 rows) + select stddev(cint4) from aggfns where cfloat8 <= 0; stddev ------------------- @@ -2519,6 +4200,21 @@ select s, stddev(cint4) from aggfns where cfloat8 <= 0 group by s order by stdde 5 | 9533.551517829360 (9 rows) +select ss, stddev(cint4) from aggfns where cfloat8 <= 0 group by ss order by stddev(cint4), ss limit 10; + ss | stddev +----+------------------- + 9 | 9377.745829196558 + 11 | 9422.029173765748 + 6 | 9436.031206307503 + 3 | 9439.178404000439 + 0 | 9444.372352979574 + 4 | 9468.093604068949 + 7 | 9470.920199125109 + 8 | 9488.579674823607 + 5 | 9533.551517829360 + | 10351.23962464 +(10 rows) + select sum(cint4) from aggfns where cfloat8 <= 0; sum --------- @@ -2539,6 +4235,21 @@ select s, sum(cint4) from aggfns where cfloat8 <= 0 group by s order by sum(cint 5 | 1364096 (9 rows) +select ss, sum(cint4) from aggfns where cfloat8 <= 0 group by ss order by sum(cint4), ss limit 10; + ss | sum +----+--------- + 8 | -891469 + 7 | -772457 + 11 | -530268 + 3 | -322034 + 0 | -220736 + 6 | -106243 + | 30463 + 9 | 209399 + 4 | 597724 + 5 | 1364096 +(10 rows) + select avg(cint8) from aggfns where cfloat8 <= 0; avg ---------------------- @@ -2559,6 +4270,21 @@ select s, avg(cint8) from aggfns where cfloat8 <= 0 group by s order by avg(cint 9 | 78.7373446046742914 (9 rows) +select ss, avg(cint8) from aggfns where cfloat8 <= 0 group by ss order by avg(cint8), ss limit 10; + ss | avg +----+----------------------- + 11 | -161.0356316054353296 + 5 | -84.4558236936577583 + 8 | -71.0010945273631841 + 3 | -30.2171269065895723 + 0 | -11.7269493032908390 + 7 | -5.8845575708988877 + 4 | 26.3155603917301415 + 6 | 57.5590225184287590 + 9 | 78.7373446046742914 + | 1579.8333333333333333 +(10 rows) + select max(cint8) from aggfns where cfloat8 <= 0; max ------- @@ -2579,6 +4305,21 @@ select s, max(cint8) from aggfns where cfloat8 <= 0 group by s order by max(cint 4 | 16383 (9 rows) +select ss, max(cint8) from aggfns where cfloat8 <= 0 group by ss order by max(cint8), ss limit 10; + ss | max +----+------- + | 12678 + 8 | 16379 + 11 | 16379 + 6 | 16380 + 7 | 16380 + 5 | 16381 + 9 | 16381 + 3 | 16382 + 0 | 16383 + 4 | 16383 +(10 rows) + select min(cint8) from aggfns where cfloat8 <= 0; min -------- @@ -2599,6 +4340,21 @@ select s, min(cint8) from aggfns where cfloat8 <= 0 group by s order by min(cint 9 | -16372 (9 rows) +select ss, min(cint8) from aggfns where cfloat8 <= 0 group by ss order by min(cint8), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 6 | -16383 + 8 | -16383 + 5 | -16382 + 4 | -16381 + 7 | -16381 + 3 | -16375 + 11 | -16375 + 9 | -16372 + | -14174 +(10 rows) + select sum(cint8) from aggfns where cfloat8 <= 0; sum ---------- @@ -2619,6 +4375,21 @@ select s, sum(cint8) from aggfns where cfloat8 <= 0 group by s order by sum(cint 9 | 791704 (9 rows) +select ss, sum(cint8) from aggfns where cfloat8 <= 0 group by ss order by sum(cint8), ss limit 10; + ss | sum +----+---------- + 11 | -1599889 + 5 | -846923 + 8 | -713561 + 3 | -303108 + 0 | -118665 + 7 | -58722 + | 9479 + 4 | 266024 + 6 | 570007 + 9 | 791704 +(10 rows) + select max(cts) from aggfns where cfloat8 <= 0; max -------------------------- @@ -2639,6 +4410,21 @@ select s, max(cts) from aggfns where cfloat8 <= 0 group by s order by max(cts), 9 | Sat Jan 02 02:01:01 2021 (9 rows) +select ss, max(cts) from aggfns where cfloat8 <= 0 group by ss order by max(cts), ss limit 10; + ss | max +----+-------------------------- + 0 | Fri Jan 01 01:01:01 2021 + 3 | Fri Jan 01 09:21:01 2021 + | Fri Jan 01 09:21:01 2021 + 4 | Fri Jan 01 12:07:41 2021 + 11 | Fri Jan 01 12:07:41 2021 + 5 | Fri Jan 01 14:54:21 2021 + 6 | Fri Jan 01 17:41:01 2021 + 7 | Fri Jan 01 20:27:41 2021 + 8 | Fri Jan 01 23:14:21 2021 + 9 | Sat Jan 02 02:01:01 2021 +(10 rows) + select min(cts) from aggfns where cfloat8 <= 0; min -------------------------- @@ -2659,6 +4445,21 @@ select s, min(cts) from aggfns where cfloat8 <= 0 group by s order by min(cts), 9 | Sat Jan 02 02:01:01 2021 (9 rows) +select ss, min(cts) from aggfns where cfloat8 <= 0 group by ss order by min(cts), ss limit 10; + ss | min +----+-------------------------- + 0 | Fri Jan 01 01:01:01 2021 + 11 | Fri Jan 01 06:34:21 2021 + 3 | Fri Jan 01 09:21:01 2021 + | Fri Jan 01 09:21:01 2021 + 4 | Fri Jan 01 12:07:41 2021 + 5 | Fri Jan 01 14:54:21 2021 + 6 | Fri Jan 01 17:41:01 2021 + 7 | Fri Jan 01 20:27:41 2021 + 8 | Fri Jan 01 23:14:21 2021 + 9 | Sat Jan 02 02:01:01 2021 +(10 rows) + select max(ctstz) from aggfns where cfloat8 <= 0; max ------------------------------ @@ -2679,7 +4480,22 @@ select s, max(ctstz) from aggfns where cfloat8 <= 0 group by s order by max(ctst 9 | Sat Jan 02 02:01:01 2021 PST (9 rows) -select min(ctstz) from aggfns where cfloat8 <= 0; +select ss, max(ctstz) from aggfns where cfloat8 <= 0 group by ss order by max(ctstz), ss limit 10; + ss | max +----+------------------------------ + 0 | Fri Jan 01 01:01:01 2021 PST + 3 | Fri Jan 01 09:21:01 2021 PST + | Fri Jan 01 09:21:01 2021 PST + 4 | Fri Jan 01 12:07:41 2021 PST + 11 | Fri Jan 01 12:07:41 2021 PST + 5 | Fri Jan 01 14:54:21 2021 PST + 6 | Fri Jan 01 17:41:01 2021 PST + 7 | Fri Jan 01 20:27:41 2021 PST + 8 | Fri Jan 01 23:14:21 2021 PST + 9 | Sat Jan 02 02:01:01 2021 PST +(10 rows) + +select min(ctstz) from aggfns where cfloat8 <= 0; min ------------------------------ Fri Jan 01 01:01:01 2021 PST @@ -2699,6 +4515,21 @@ select s, min(ctstz) from aggfns where cfloat8 <= 0 group by s order by min(ctst 9 | Sat Jan 02 02:01:01 2021 PST (9 rows) +select ss, min(ctstz) from aggfns where cfloat8 <= 0 group by ss order by min(ctstz), ss limit 10; + ss | min +----+------------------------------ + 0 | Fri Jan 01 01:01:01 2021 PST + 11 | Fri Jan 01 06:34:21 2021 PST + 3 | Fri Jan 01 09:21:01 2021 PST + | Fri Jan 01 09:21:01 2021 PST + 4 | Fri Jan 01 12:07:41 2021 PST + 5 | Fri Jan 01 14:54:21 2021 PST + 6 | Fri Jan 01 17:41:01 2021 PST + 7 | Fri Jan 01 20:27:41 2021 PST + 8 | Fri Jan 01 23:14:21 2021 PST + 9 | Sat Jan 02 02:01:01 2021 PST +(10 rows) + select avg(s) from aggfns where cfloat8 <= 0; avg -------------------- @@ -2719,6 +4550,21 @@ select s, avg(s) from aggfns where cfloat8 <= 0 group by s order by avg(s), s li 9 | 9.0000000000000000 (9 rows) +select ss, avg(s) from aggfns where cfloat8 <= 0 group by ss order by avg(s), ss limit 10; + ss | avg +----+---------------------------- + 0 | 0.000000000000000000000000 + 11 | 2.0018117765475591 + 3 | 3.0000000000000000 + | 3.0000000000000000 + 4 | 4.0000000000000000 + 5 | 5.0000000000000000 + 6 | 6.0000000000000000 + 7 | 7.0000000000000000 + 8 | 8.0000000000000000 + 9 | 9.0000000000000000 +(10 rows) + select count(s) from aggfns where cfloat8 <= 0; count ------- @@ -2739,6 +4585,21 @@ select s, count(s) from aggfns where cfloat8 <= 0 group by s order by count(s), 0 | 10119 (9 rows) +select ss, count(s) from aggfns where cfloat8 <= 0 group by ss order by count(s), ss limit 10; + ss | count +----+------- + | 6 + 6 | 9903 + 11 | 9935 + 7 | 9979 + 5 | 10028 + 3 | 10031 + 8 | 10050 + 9 | 10055 + 4 | 10109 + 0 | 10119 +(10 rows) + select max(s) from aggfns where cfloat8 <= 0; max ----- @@ -2759,6 +4620,21 @@ select s, max(s) from aggfns where cfloat8 <= 0 group by s order by max(s), s li 9 | 9 (9 rows) +select ss, max(s) from aggfns where cfloat8 <= 0 group by ss order by max(s), ss limit 10; + ss | max +----+----- + 0 | 0 + 3 | 3 + | 3 + 4 | 4 + 11 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + select min(s) from aggfns where cfloat8 <= 0; min ----- @@ -2779,6 +4655,21 @@ select s, min(s) from aggfns where cfloat8 <= 0 group by s order by min(s), s li 9 | 9 (9 rows) +select ss, min(s) from aggfns where cfloat8 <= 0 group by ss order by min(s), ss limit 10; + ss | min +----+----- + 0 | 0 + 11 | 2 + 3 | 3 + | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + select stddev(s) from aggfns where cfloat8 <= 0; stddev -------------------- @@ -2799,6 +4690,21 @@ select s, stddev(s) from aggfns where cfloat8 <= 0 group by s order by stddev(s) 9 | 0 (9 rows) +select ss, stddev(s) from aggfns where cfloat8 <= 0 group by ss order by stddev(s), ss limit 10; + ss | stddev +----+------------------------ + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + | 0 + 11 | 0.06017171256636552646 +(10 rows) + select sum(s) from aggfns where cfloat8 <= 0; sum -------- @@ -2819,6 +4725,21 @@ select s, sum(s) from aggfns where cfloat8 <= 0 group by s order by sum(s), s li 9 | 90495 (9 rows) +select ss, sum(s) from aggfns where cfloat8 <= 0 group by ss order by sum(s), ss limit 10; + ss | sum +----+------- + 0 | 0 + | 18 + 11 | 19888 + 3 | 30093 + 4 | 40436 + 5 | 50140 + 6 | 59418 + 7 | 69853 + 8 | 80400 + 9 | 90495 +(10 rows) + select avg(ss) from aggfns where cfloat8 <= 0; avg -------------------- @@ -2839,6 +4760,21 @@ select s, avg(ss) from aggfns where cfloat8 <= 0 group by s order by avg(ss), s 2 | 11.0000000000000000 (9 rows) +select ss, avg(ss) from aggfns where cfloat8 <= 0 group by ss order by avg(ss), ss limit 10; + ss | avg +----+---------------------------- + 0 | 0.000000000000000000000000 + 3 | 3.0000000000000000 + 4 | 4.0000000000000000 + 5 | 5.0000000000000000 + 6 | 6.0000000000000000 + 7 | 7.0000000000000000 + 8 | 8.0000000000000000 + 9 | 9.0000000000000000 + 11 | 11.0000000000000000 + | +(10 rows) + select max(ss) from aggfns where cfloat8 <= 0; max ----- @@ -2859,6 +4795,21 @@ select s, max(ss) from aggfns where cfloat8 <= 0 group by s order by max(ss), s 4 | 11 (9 rows) +select ss, max(ss) from aggfns where cfloat8 <= 0 group by ss order by max(ss), ss limit 10; + ss | max +----+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 11 | 11 + | +(10 rows) + select min(ss) from aggfns where cfloat8 <= 0; min ----- @@ -2879,6 +4830,21 @@ select s, min(ss) from aggfns where cfloat8 <= 0 group by s order by min(ss), s 2 | 11 (9 rows) +select ss, min(ss) from aggfns where cfloat8 <= 0 group by ss order by min(ss), ss limit 10; + ss | min +----+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 11 | 11 + | +(10 rows) + select stddev(ss) from aggfns where cfloat8 <= 0; stddev -------------------- @@ -2899,6 +4865,21 @@ select s, stddev(ss) from aggfns where cfloat8 <= 0 group by s order by stddev(s 4 | 0.20868929911309143893 (9 rows) +select ss, stddev(ss) from aggfns where cfloat8 <= 0 group by ss order by stddev(ss), ss limit 10; + ss | stddev +----+-------- + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0 + | +(10 rows) + select sum(ss) from aggfns where cfloat8 <= 0; sum -------- @@ -2919,6 +4900,21 @@ select s, sum(ss) from aggfns where cfloat8 <= 0 group by s order by sum(ss), s 2 | 109186 (9 rows) +select ss, sum(ss) from aggfns where cfloat8 <= 0 group by ss order by sum(ss), ss limit 10; + ss | sum +----+-------- + 0 | 0 + 3 | 30093 + 4 | 40436 + 5 | 50140 + 6 | 59418 + 7 | 69853 + 8 | 80400 + 9 | 90495 + 11 | 109285 + | +(10 rows) + select max(t) from aggfns where cfloat8 <= 0; max -------- @@ -2939,6 +4935,21 @@ select s, max(t) from aggfns where cfloat8 <= 0 group by s order by max(t), s li 9 | 109998 (9 rows) +select ss, max(t) from aggfns where cfloat8 <= 0 group by ss order by max(t), ss limit 10; + ss | max +----+-------- + 0 | 20000 + | 48438 + 3 | 50000 + 11 | 58135 + 4 | 59998 + 5 | 69999 + 6 | 80000 + 7 | 90000 + 8 | 100000 + 9 | 109998 +(10 rows) + select min(t) from aggfns where cfloat8 <= 0; min ----- @@ -2959,6 +4970,21 @@ select s, min(t) from aggfns where cfloat8 <= 0 group by s order by min(t), s li 9 | 90001 (9 rows) +select ss, min(t) from aggfns where cfloat8 <= 0 group by ss order by min(t), ss limit 10; + ss | min +----+------- + 0 | 8 + 11 | 20003 + 3 | 30002 + | 33696 + 4 | 40001 + 5 | 50004 + 6 | 60001 + 7 | 70002 + 8 | 80001 + 9 | 90001 +(10 rows) + select count(*) from aggfns where cfloat8 < 1000; count -------- @@ -2980,6 +5006,21 @@ select s, count(*) from aggfns where cfloat8 < 1000 group by s order by count(*) 9 | 20000 (10 rows) +select ss, count(*) from aggfns where cfloat8 < 1000 group by ss order by count(*), ss limit 10; + ss | count +----+------- + | 19 + 3 | 19981 + 4 | 19981 + 0 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 + 11 | 40019 +(10 rows) + select max(cdate) from aggfns where cfloat8 < 1000; max ------------ @@ -3001,6 +5042,21 @@ select s, max(cdate) from aggfns where cfloat8 < 1000 group by s order by max(cd 9 | 06-01-2267 (10 rows) +select ss, max(cdate) from aggfns where cfloat8 < 1000 group by ss order by max(cdate), ss limit 10; + ss | max +----+------------ + 0 | 01-01-2021 + 3 | 02-21-2103 + | 02-21-2103 + 4 | 07-09-2130 + 11 | 07-09-2130 + 5 | 11-24-2157 + 6 | 04-11-2185 + 7 | 08-28-2212 + 8 | 01-14-2240 + 9 | 06-01-2267 +(10 rows) + select min(cdate) from aggfns where cfloat8 < 1000; min ------------ @@ -3022,6 +5078,21 @@ select s, min(cdate) from aggfns where cfloat8 < 1000 group by s order by min(cd 9 | 06-01-2267 (10 rows) +select ss, min(cdate) from aggfns where cfloat8 < 1000 group by ss order by min(cdate), ss limit 10; + ss | min +----+------------ + 0 | 01-01-2021 + 11 | 05-19-2048 + 3 | 02-21-2103 + | 02-21-2103 + 4 | 07-09-2130 + 5 | 11-24-2157 + 6 | 04-11-2185 + 7 | 08-28-2212 + 8 | 01-14-2240 + 9 | 06-01-2267 +(10 rows) + select avg(cfloat4) from aggfns where cfloat8 < 1000; avg ----- @@ -3043,6 +5114,21 @@ select s, avg(cfloat4) from aggfns where cfloat8 < 1000 group by s order by avg( 1 | NaN (10 rows) +select ss, avg(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by avg(cfloat4), ss limit 10; + ss | avg +----+---------------------- + 3 | -Infinity + | -1.39583652270468 + 9 | -0.292700759558938 + 4 | -0.169252917487522 + 6 | -0.00610964622725733 + 5 | 0.0107821527590975 + 0 | 0.0862269837114494 + 7 | 0.19168354413514 + 8 | 0.456703752867272 + 11 | NaN +(10 rows) + select max(cfloat4) from aggfns where cfloat8 < 1000; max ----- @@ -3064,6 +5150,21 @@ select s, max(cfloat4) from aggfns where cfloat8 < 1000 group by s order by max( 1 | NaN (10 rows) +select ss, max(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by max(cfloat4), ss limit 10; + ss | max +----+--------- + | 47.2047 + 9 | 49.9899 + 4 | 49.9946 + 6 | 49.9956 + 7 | 49.9969 + 3 | 49.9979 + 5 | 49.9992 + 0 | 49.9995 + 8 | 49.9997 + 11 | NaN +(10 rows) + select min(cfloat4) from aggfns where cfloat8 < 1000; min ----------- @@ -3085,6 +5186,21 @@ select s, min(cfloat4) from aggfns where cfloat8 < 1000 group by s order by min( 9 | -49.9911 (10 rows) +select ss, min(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by min(cfloat4), ss limit 10; + ss | min +----+----------- + 3 | -Infinity + 4 | -49.9999 + 6 | -49.9995 + 7 | -49.9984 + 8 | -49.9969 + 0 | -49.9949 + 5 | -49.9942 + 9 | -49.9911 + | -45.4083 + 11 | NaN +(10 rows) + select stddev(cfloat4) from aggfns where cfloat8 < 1000; stddev -------- @@ -3106,6 +5222,21 @@ select s, stddev(cfloat4) from aggfns where cfloat8 < 1000 group by s order by s 3 | NaN (10 rows) +select ss, stddev(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by stddev(cfloat4), ss limit 10; + ss | stddev +----+------------------ + 0 | 28.7274163912974 + 7 | 28.7892027644318 + 4 | 28.8220943927954 + 9 | 28.8426424990846 + 6 | 28.9190577543738 + 8 | 29.0040125904064 + 5 | 29.0213532270614 + | 30.6324072248673 + 3 | NaN + 11 | NaN +(10 rows) + select sum(cfloat4) from aggfns where cfloat8 < 1000; sum ----- @@ -3127,6 +5258,21 @@ select s, sum(cfloat4) from aggfns where cfloat8 < 1000 group by s order by sum( 1 | NaN (10 rows) +select ss, sum(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by sum(cfloat4), ss limit 10; + ss | sum +----+----------- + 3 | -Infinity + 9 | -5854.02 + 4 | -3381.84 + 6 | -122.193 + | -26.5209 + 5 | 215.643 + 0 | 1724.54 + 7 | 3833.67 + 8 | 9134.08 + 11 | NaN +(10 rows) + select avg(cfloat8) from aggfns where cfloat8 < 1000; avg ----------------- @@ -3148,6 +5294,21 @@ select s, avg(cfloat8) from aggfns where cfloat8 < 1000 group by s order by avg( 1 | 13 (10 rows) +select ss, avg(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by avg(cfloat8), ss limit 10; + ss | avg +----+-------------------- + 0 | -0.306925132697215 + 8 | -0.268692900155438 + 4 | -0.224160255000712 + 3 | -0.153492446187821 + 9 | -0.114842409039848 + 7 | -0.063637967283139 + 5 | 0.0438265096326359 + 6 | 0.169599099685438 + | 5.42090986487701 + 11 | 6.59778165165114 +(10 rows) + select max(cfloat8) from aggfns where cfloat8 < 1000; max ------------------ @@ -3169,6 +5330,21 @@ select s, max(cfloat8) from aggfns where cfloat8 < 1000 group by s order by max( 9 | 49.9995574122295 (10 rows) +select ss, max(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by max(cfloat8), ss limit 10; + ss | max +----+------------------ + | 46.3985309237614 + 5 | 49.9874341068789 + 3 | 49.9890822684392 + 6 | 49.9939429108053 + 8 | 49.9963666079566 + 0 | 49.9965498689562 + 7 | 49.9973275698721 + 11 | 49.9975695507601 + 4 | 49.9978997278959 + 9 | 49.9995574122295 +(10 rows) + select min(cfloat8) from aggfns where cfloat8 < 1000; min ------------------- @@ -3190,6 +5366,21 @@ select s, min(cfloat8) from aggfns where cfloat8 < 1000 group by s order by min( 1 | 13 (10 rows) +select ss, min(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by min(cfloat8), ss limit 10; + ss | min +----+------------------- + 0 | -49.9994775978848 + 11 | -49.9985320260748 + 4 | -49.9983572866768 + 3 | -49.9977725092322 + 6 | -49.9967515002936 + 9 | -49.992344272323 + 5 | -49.9921301845461 + 7 | -49.99003498815 + 8 | -49.9897602945566 + | -38.5084833716974 +(10 rows) + select stddev(cfloat8) from aggfns where cfloat8 < 1000; stddev ------------------ @@ -3211,6 +5402,21 @@ select s, stddev(cfloat8) from aggfns where cfloat8 < 1000 group by s order by s 7 | 28.9656492103737 (10 rows) +select ss, stddev(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by stddev(cfloat8), ss limit 10; + ss | stddev +----+------------------ + 11 | 21.3262797346004 + | 22.894065438835 + 9 | 28.7642081921344 + 4 | 28.7760615445521 + 5 | 28.7843925303698 + 6 | 28.8543767497508 + 3 | 28.926156595386 + 8 | 28.96331707256 + 0 | 28.9653425568561 + 7 | 28.9656492103736 +(10 rows) + select sum(cfloat8) from aggfns where cfloat8 < 1000; sum ----------------- @@ -3232,6 +5438,21 @@ select s, sum(cfloat8) from aggfns where cfloat8 < 1000 group by s order by sum( 1 | 260000 (10 rows) +select ss, sum(cfloat8) from aggfns where cfloat8 < 1000 group by ss order by sum(cfloat8), ss limit 10; + ss | sum +----+------------------- + 0 | -6138.50265394431 + 8 | -5373.85800310876 + 4 | -4478.94605516922 + 3 | -3066.93256727885 + 9 | -2296.84818079695 + 7 | -1272.75934566278 + | 102.997287432663 + 5 | 876.530192652717 + 6 | 3391.98199370876 + 11 | 264036.623917427 +(10 rows) + select avg(cint2) from aggfns where cfloat8 < 1000; avg ---------------------- @@ -3253,6 +5474,21 @@ select s, avg(cint2) from aggfns where cfloat8 < 1000 group by s order by avg(ci 5 | 110.0305290025524248 (10 rows) +select ss, avg(cint2) from aggfns where cfloat8 < 1000 group by ss order by avg(cint2), ss limit 10; + ss | avg +----+------------------------ + | -1368.1578947368421053 + 8 | -129.4959711726139833 + 3 | -94.5546037471195271 + 6 | -61.0756218407487113 + 7 | -55.8695260497472599 + 11 | -33.7550336409794652 + 4 | -27.5652740206392145 + 9 | -21.7994594865121866 + 0 | 17.5951654071367799 + 5 | 110.0305290025524248 +(10 rows) + select count(cint2) from aggfns where cfloat8 < 1000; count -------- @@ -3274,6 +5510,21 @@ select s, count(cint2) from aggfns where cfloat8 < 1000 group by s order by coun 9 | 19981 (10 rows) +select ss, count(cint2) from aggfns where cfloat8 < 1000 group by ss order by count(cint2), ss limit 10; + ss | count +----+------- + | 19 + 3 | 19962 + 4 | 19962 + 0 | 19981 + 5 | 19981 + 6 | 19981 + 7 | 19981 + 8 | 19981 + 9 | 19981 + 11 | 39981 +(10 rows) + select max(cint2) from aggfns where cfloat8 < 1000; max ------- @@ -3295,6 +5546,21 @@ select s, max(cint2) from aggfns where cfloat8 < 1000 group by s order by max(ci 9 | 16383 (10 rows) +select ss, max(cint2) from aggfns where cfloat8 < 1000 group by ss order by max(cint2), ss limit 10; + ss | max +----+------- + | 16362 + 3 | 16380 + 5 | 16381 + 7 | 16381 + 8 | 16382 + 0 | 16383 + 4 | 16383 + 6 | 16383 + 9 | 16383 + 11 | 16383 +(10 rows) + select min(cint2) from aggfns where cfloat8 < 1000; min -------- @@ -3316,6 +5582,21 @@ select s, min(cint2) from aggfns where cfloat8 < 1000 group by s order by min(ci 9 | -16375 (10 rows) +select ss, min(cint2) from aggfns where cfloat8 < 1000 group by ss order by min(cint2), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 4 | -16383 + 5 | -16383 + 6 | -16383 + 7 | -16382 + 8 | -16382 + 11 | -16382 + 3 | -16381 + 9 | -16375 + | -16100 +(10 rows) + select stddev(cint2) from aggfns where cfloat8 < 1000; stddev ------------------- @@ -3337,6 +5618,21 @@ select s, stddev(cint2) from aggfns where cfloat8 < 1000 group by s order by std 1 | 9528.039076724276 (10 rows) +select ss, stddev(cint2) from aggfns where cfloat8 < 1000 group by ss order by stddev(cint2), ss limit 10; + ss | stddev +----+------------------- + | 8413.549166956554 + 9 | 9450.322790943425 + 7 | 9462.161209850735 + 6 | 9467.569674984571 + 5 | 9467.776835158782 + 3 | 9474.482349111595 + 8 | 9477.586839536066 + 4 | 9483.611454519949 + 0 | 9484.907423282680 + 11 | 9494.206429493352 +(10 rows) + select sum(cint2) from aggfns where cfloat8 < 1000; sum ---------- @@ -3358,6 +5654,21 @@ select s, sum(cint2) from aggfns where cfloat8 < 1000 group by s order by sum(ci 5 | 2198520 (10 rows) +select ss, sum(cint2) from aggfns where cfloat8 < 1000 group by ss order by sum(cint2), ss limit 10; + ss | sum +----+---------- + 8 | -2587459 + 3 | -1887499 + 11 | -1349560 + 6 | -1220352 + 7 | -1116329 + 4 | -550258 + 9 | -435575 + | -25995 + 0 | 351569 + 5 | 2198520 +(10 rows) + select avg(cint4) from aggfns where cfloat8 < 1000; avg --------------------- @@ -3379,6 +5690,21 @@ select s, avg(cint4) from aggfns where cfloat8 < 1000 group by s order by avg(ci 5 | 103.1069000000000000 (10 rows) +select ss, avg(cint4) from aggfns where cfloat8 < 1000 group by ss order by avg(cint4), ss limit 10; + ss | avg +----+----------------------- + 9 | -102.4283000000000000 + 6 | -53.1566500000000000 + 7 | -42.6121500000000000 + 8 | -29.2615500000000000 + 11 | -16.4247732327144606 + 4 | 9.6930584054852110 + 0 | 27.7536500000000000 + 3 | 68.3874180471447875 + 5 | 103.1069000000000000 + | 2197.6842105263157895 +(10 rows) + select max(cint4) from aggfns where cfloat8 < 1000; max ------- @@ -3400,6 +5726,21 @@ select s, max(cint4) from aggfns where cfloat8 < 1000 group by s order by max(ci 9 | 16383 (10 rows) +select ss, max(cint4) from aggfns where cfloat8 < 1000 group by ss order by max(cint4), ss limit 10; + ss | max +----+------- + | 14812 + 3 | 16379 + 5 | 16379 + 7 | 16379 + 0 | 16383 + 4 | 16383 + 6 | 16383 + 8 | 16383 + 9 | 16383 + 11 | 16383 +(10 rows) + select min(cint4) from aggfns where cfloat8 < 1000; min -------- @@ -3421,6 +5762,21 @@ select s, min(cint4) from aggfns where cfloat8 < 1000 group by s order by min(ci 5 | -16380 (10 rows) +select ss, min(cint4) from aggfns where cfloat8 < 1000 group by ss order by min(cint4), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 7 | -16383 + 11 | -16383 + 3 | -16382 + 4 | -16382 + 6 | -16382 + 8 | -16382 + 9 | -16382 + 5 | -16380 + | -15907 +(10 rows) + select stddev(cint4) from aggfns where cfloat8 < 1000; stddev ------------------- @@ -3442,6 +5798,21 @@ select s, stddev(cint4) from aggfns where cfloat8 < 1000 group by s order by std 5 | 9504.684751625578 (10 rows) +select ss, stddev(cint4) from aggfns where cfloat8 < 1000 group by ss order by stddev(cint4), ss limit 10; + ss | stddev +----+------------------- + | 9361.317298404296 + 0 | 9406.815855797801 + 6 | 9410.397911988306 + 9 | 9426.452583637956 + 4 | 9442.480718256247 + 8 | 9450.281544631633 + 11 | 9450.690059613938 + 3 | 9474.873657491443 + 7 | 9485.765898279180 + 5 | 9504.684751625578 +(10 rows) + select sum(cint4) from aggfns where cfloat8 < 1000; sum --------- @@ -3463,6 +5834,21 @@ select s, sum(cint4) from aggfns where cfloat8 < 1000 group by s order by sum(ci 5 | 2062138 (10 rows) +select ss, sum(cint4) from aggfns where cfloat8 < 1000 group by ss order by sum(cint4), ss limit 10; + ss | sum +----+---------- + 9 | -2048566 + 6 | -1063133 + 7 | -852243 + 11 | -657303 + 8 | -585231 + | 41756 + 4 | 193677 + 0 | 555073 + 3 | 1366449 + 5 | 2062138 +(10 rows) + select avg(cint8) from aggfns where cfloat8 < 1000; avg ---------------------- @@ -3484,6 +5870,21 @@ select s, avg(cint8) from aggfns where cfloat8 < 1000 group by s order by avg(ci 9 | 61.7467500000000000 (10 rows) +select ss, avg(cint8) from aggfns where cfloat8 < 1000 group by ss order by avg(cint8), ss limit 10; + ss | avg +----+----------------------- + 8 | -118.4870000000000000 + 5 | -81.6955500000000000 + 4 | -17.0811771182623492 + 11 | -15.1685449411529523 + 7 | -2.3563500000000000 + 6 | 11.9056500000000000 + 0 | 15.3018000000000000 + 3 | 37.6662329212752115 + 9 | 61.7467500000000000 + | 2467.2631578947368421 +(10 rows) + select max(cint8) from aggfns where cfloat8 < 1000; max ------- @@ -3505,6 +5906,21 @@ select s, max(cint8) from aggfns where cfloat8 < 1000 group by s order by max(ci 5 | 16383 (10 rows) +select ss, max(cint8) from aggfns where cfloat8 < 1000 group by ss order by max(cint8), ss limit 10; + ss | max +----+------- + | 13750 + 6 | 16380 + 7 | 16380 + 8 | 16380 + 3 | 16382 + 9 | 16382 + 0 | 16383 + 4 | 16383 + 5 | 16383 + 11 | 16383 +(10 rows) + select min(cint8) from aggfns where cfloat8 < 1000; min -------- @@ -3526,6 +5942,21 @@ select s, min(cint8) from aggfns where cfloat8 < 1000 group by s order by min(ci 3 | -16378 (10 rows) +select ss, min(cint8) from aggfns where cfloat8 < 1000 group by ss order by min(cint8), ss limit 10; + ss | min +----+-------- + 0 | -16383 + 6 | -16383 + 7 | -16383 + 8 | -16383 + 11 | -16383 + 5 | -16382 + 4 | -16381 + 9 | -16380 + 3 | -16378 + | -14174 +(10 rows) + select sum(cint8) from aggfns where cfloat8 < 1000; sum ---------- @@ -3547,6 +5978,21 @@ select s, sum(cint8) from aggfns where cfloat8 < 1000 group by s order by sum(ci 9 | 1234935 (10 rows) +select ss, sum(cint8) from aggfns where cfloat8 < 1000 group by ss order by sum(cint8), ss limit 10; + ss | sum +----+---------- + 8 | -2369740 + 5 | -1633911 + 11 | -607030 + 4 | -341299 + 7 | -47127 + | 46878 + 6 | 238113 + 0 | 306036 + 3 | 752609 + 9 | 1234935 +(10 rows) + select max(cts) from aggfns where cfloat8 < 1000; max -------------------------- @@ -3568,6 +6014,21 @@ select s, max(cts) from aggfns where cfloat8 < 1000 group by s order by max(cts) 9 | Sat Jan 02 02:01:01 2021 (10 rows) +select ss, max(cts) from aggfns where cfloat8 < 1000 group by ss order by max(cts), ss limit 10; + ss | max +----+-------------------------- + 0 | Fri Jan 01 01:01:01 2021 + 3 | Fri Jan 01 09:21:01 2021 + | Fri Jan 01 09:21:01 2021 + 4 | Fri Jan 01 12:07:41 2021 + 11 | Fri Jan 01 12:07:41 2021 + 5 | Fri Jan 01 14:54:21 2021 + 6 | Fri Jan 01 17:41:01 2021 + 7 | Fri Jan 01 20:27:41 2021 + 8 | Fri Jan 01 23:14:21 2021 + 9 | Sat Jan 02 02:01:01 2021 +(10 rows) + select min(cts) from aggfns where cfloat8 < 1000; min -------------------------- @@ -3589,6 +6050,21 @@ select s, min(cts) from aggfns where cfloat8 < 1000 group by s order by min(cts) 9 | Sat Jan 02 02:01:01 2021 (10 rows) +select ss, min(cts) from aggfns where cfloat8 < 1000 group by ss order by min(cts), ss limit 10; + ss | min +----+-------------------------- + 0 | Fri Jan 01 01:01:01 2021 + 11 | Fri Jan 01 03:47:41 2021 + 3 | Fri Jan 01 09:21:01 2021 + | Fri Jan 01 09:21:01 2021 + 4 | Fri Jan 01 12:07:41 2021 + 5 | Fri Jan 01 14:54:21 2021 + 6 | Fri Jan 01 17:41:01 2021 + 7 | Fri Jan 01 20:27:41 2021 + 8 | Fri Jan 01 23:14:21 2021 + 9 | Sat Jan 02 02:01:01 2021 +(10 rows) + select max(ctstz) from aggfns where cfloat8 < 1000; max ------------------------------ @@ -3610,6 +6086,21 @@ select s, max(ctstz) from aggfns where cfloat8 < 1000 group by s order by max(ct 9 | Sat Jan 02 02:01:01 2021 PST (10 rows) +select ss, max(ctstz) from aggfns where cfloat8 < 1000 group by ss order by max(ctstz), ss limit 10; + ss | max +----+------------------------------ + 0 | Fri Jan 01 01:01:01 2021 PST + 3 | Fri Jan 01 09:21:01 2021 PST + | Fri Jan 01 09:21:01 2021 PST + 4 | Fri Jan 01 12:07:41 2021 PST + 11 | Fri Jan 01 12:07:41 2021 PST + 5 | Fri Jan 01 14:54:21 2021 PST + 6 | Fri Jan 01 17:41:01 2021 PST + 7 | Fri Jan 01 20:27:41 2021 PST + 8 | Fri Jan 01 23:14:21 2021 PST + 9 | Sat Jan 02 02:01:01 2021 PST +(10 rows) + select min(ctstz) from aggfns where cfloat8 < 1000; min ------------------------------ @@ -3631,6 +6122,21 @@ select s, min(ctstz) from aggfns where cfloat8 < 1000 group by s order by min(ct 9 | Sat Jan 02 02:01:01 2021 PST (10 rows) +select ss, min(ctstz) from aggfns where cfloat8 < 1000 group by ss order by min(ctstz), ss limit 10; + ss | min +----+------------------------------ + 0 | Fri Jan 01 01:01:01 2021 PST + 11 | Fri Jan 01 03:47:41 2021 PST + 3 | Fri Jan 01 09:21:01 2021 PST + | Fri Jan 01 09:21:01 2021 PST + 4 | Fri Jan 01 12:07:41 2021 PST + 5 | Fri Jan 01 14:54:21 2021 PST + 6 | Fri Jan 01 17:41:01 2021 PST + 7 | Fri Jan 01 20:27:41 2021 PST + 8 | Fri Jan 01 23:14:21 2021 PST + 9 | Sat Jan 02 02:01:01 2021 PST +(10 rows) + select avg(s) from aggfns where cfloat8 < 1000; avg -------------------- @@ -3652,6 +6158,21 @@ select s, avg(s) from aggfns where cfloat8 < 1000 group by s order by avg(s), s 9 | 9.0000000000000000 (10 rows) +select ss, avg(s) from aggfns where cfloat8 < 1000 group by ss order by avg(s), ss limit 10; + ss | avg +----+---------------------------- + 0 | 0.000000000000000000000000 + 11 | 1.5011869362053025 + 3 | 3.0000000000000000 + | 3.0000000000000000 + 4 | 4.0000000000000000 + 5 | 5.0000000000000000 + 6 | 6.0000000000000000 + 7 | 7.0000000000000000 + 8 | 8.0000000000000000 + 9 | 9.0000000000000000 +(10 rows) + select count(s) from aggfns where cfloat8 < 1000; count -------- @@ -3673,6 +6194,21 @@ select s, count(s) from aggfns where cfloat8 < 1000 group by s order by count(s) 9 | 20000 (10 rows) +select ss, count(s) from aggfns where cfloat8 < 1000 group by ss order by count(s), ss limit 10; + ss | count +----+------- + | 19 + 3 | 19981 + 4 | 19981 + 0 | 20000 + 5 | 20000 + 6 | 20000 + 7 | 20000 + 8 | 20000 + 9 | 20000 + 11 | 40019 +(10 rows) + select max(s) from aggfns where cfloat8 < 1000; max ----- @@ -3694,6 +6230,21 @@ select s, max(s) from aggfns where cfloat8 < 1000 group by s order by max(s), s 9 | 9 (10 rows) +select ss, max(s) from aggfns where cfloat8 < 1000 group by ss order by max(s), ss limit 10; + ss | max +----+----- + 0 | 0 + 3 | 3 + | 3 + 4 | 4 + 11 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + select min(s) from aggfns where cfloat8 < 1000; min ----- @@ -3715,6 +6266,21 @@ select s, min(s) from aggfns where cfloat8 < 1000 group by s order by min(s), s 9 | 9 (10 rows) +select ss, min(s) from aggfns where cfloat8 < 1000 group by ss order by min(s), ss limit 10; + ss | min +----+----- + 0 | 0 + 11 | 1 + 3 | 3 + | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + select stddev(s) from aggfns where cfloat8 < 1000; stddev -------------------- @@ -3736,6 +6302,21 @@ select s, stddev(s) from aggfns where cfloat8 < 1000 group by s order by stddev( 9 | 0 (10 rows) +select ss, stddev(s) from aggfns where cfloat8 < 1000 group by ss order by stddev(s), ss limit 10; + ss | stddev +----+------------------------ + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + | 0 + 11 | 0.50284545977155885187 +(10 rows) + select sum(s) from aggfns where cfloat8 < 1000; sum -------- @@ -3757,6 +6338,21 @@ select s, sum(s) from aggfns where cfloat8 < 1000 group by s order by sum(s), s 9 | 180000 (10 rows) +select ss, sum(s) from aggfns where cfloat8 < 1000 group by ss order by sum(s), ss limit 10; + ss | sum +----+-------- + 0 | 0 + | 57 + 3 | 59943 + 11 | 60076 + 4 | 79924 + 5 | 100000 + 6 | 120000 + 7 | 140000 + 8 | 160000 + 9 | 180000 +(10 rows) + select avg(ss) from aggfns where cfloat8 < 1000; avg -------------------- @@ -3778,6 +6374,21 @@ select s, avg(ss) from aggfns where cfloat8 < 1000 group by s order by avg(ss), 2 | 11.0000000000000000 (10 rows) +select ss, avg(ss) from aggfns where cfloat8 < 1000 group by ss order by avg(ss), ss limit 10; + ss | avg +----+---------------------------- + 0 | 0.000000000000000000000000 + 3 | 3.0000000000000000 + 4 | 4.0000000000000000 + 5 | 5.0000000000000000 + 6 | 6.0000000000000000 + 7 | 7.0000000000000000 + 8 | 8.0000000000000000 + 9 | 9.0000000000000000 + 11 | 11.0000000000000000 + | +(10 rows) + select max(ss) from aggfns where cfloat8 < 1000; max ----- @@ -3799,6 +6410,21 @@ select s, max(ss) from aggfns where cfloat8 < 1000 group by s order by max(ss), 4 | 11 (10 rows) +select ss, max(ss) from aggfns where cfloat8 < 1000 group by ss order by max(ss), ss limit 10; + ss | max +----+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 11 | 11 + | +(10 rows) + select min(ss) from aggfns where cfloat8 < 1000; min ----- @@ -3820,6 +6446,21 @@ select s, min(ss) from aggfns where cfloat8 < 1000 group by s order by min(ss), 2 | 11 (10 rows) +select ss, min(ss) from aggfns where cfloat8 < 1000 group by ss order by min(ss), ss limit 10; + ss | min +----+----- + 0 | 0 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 11 | 11 + | +(10 rows) + select stddev(ss) from aggfns where cfloat8 < 1000; stddev -------------------- @@ -3841,6 +6482,21 @@ select s, stddev(ss) from aggfns where cfloat8 < 1000 group by s order by stddev 4 | 0.21565737387148452722 (10 rows) +select ss, stddev(ss) from aggfns where cfloat8 < 1000 group by ss order by stddev(ss), ss limit 10; + ss | stddev +----+-------- + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0 + | +(10 rows) + select sum(ss) from aggfns where cfloat8 < 1000; sum --------- @@ -3862,6 +6518,21 @@ select s, sum(ss) from aggfns where cfloat8 < 1000 group by s order by sum(ss), 2 | 220000 (10 rows) +select ss, sum(ss) from aggfns where cfloat8 < 1000 group by ss order by sum(ss), ss limit 10; + ss | sum +----+-------- + 0 | 0 + 3 | 59943 + 4 | 79924 + 5 | 100000 + 6 | 120000 + 7 | 140000 + 8 | 160000 + 9 | 180000 + 11 | 440209 + | +(10 rows) + select max(t) from aggfns where cfloat8 < 1000; max -------- @@ -3883,6 +6554,21 @@ select s, max(t) from aggfns where cfloat8 < 1000 group by s order by max(t), s 9 | 110000 (10 rows) +select ss, max(t) from aggfns where cfloat8 < 1000 group by ss order by max(t), ss limit 10; + ss | max +----+-------- + 0 | 20000 + | 49491 + 3 | 50000 + 11 | 59192 + 4 | 60000 + 5 | 70000 + 6 | 80000 + 7 | 90000 + 8 | 100000 + 9 | 110000 +(10 rows) + select min(t) from aggfns where cfloat8 < 1000; min ----- @@ -3904,6 +6590,21 @@ select s, min(t) from aggfns where cfloat8 < 1000 group by s order by min(t), s 9 | 90001 (10 rows) +select ss, min(t) from aggfns where cfloat8 < 1000 group by ss order by min(t), ss limit 10; + ss | min +----+------- + 0 | 1 + 11 | 10001 + 3 | 30001 + | 30537 + 4 | 40001 + 5 | 50001 + 6 | 60001 + 7 | 70001 + 8 | 80001 + 9 | 90001 +(10 rows) + select count(*) from aggfns where cfloat8 > 1000; count ------- @@ -3915,6 +6616,11 @@ select s, count(*) from aggfns where cfloat8 > 1000 group by s order by count(*) ---+------- (0 rows) +select ss, count(*) from aggfns where cfloat8 > 1000 group by ss order by count(*), ss limit 10; + ss | count +----+------- +(0 rows) + select max(cdate) from aggfns where cfloat8 > 1000; max ----- @@ -3926,6 +6632,11 @@ select s, max(cdate) from aggfns where cfloat8 > 1000 group by s order by max(cd ---+----- (0 rows) +select ss, max(cdate) from aggfns where cfloat8 > 1000 group by ss order by max(cdate), ss limit 10; + ss | max +----+----- +(0 rows) + select min(cdate) from aggfns where cfloat8 > 1000; min ----- @@ -3937,6 +6648,11 @@ select s, min(cdate) from aggfns where cfloat8 > 1000 group by s order by min(cd ---+----- (0 rows) +select ss, min(cdate) from aggfns where cfloat8 > 1000 group by ss order by min(cdate), ss limit 10; + ss | min +----+----- +(0 rows) + select avg(cfloat4) from aggfns where cfloat8 > 1000; avg ----- @@ -3948,6 +6664,11 @@ select s, avg(cfloat4) from aggfns where cfloat8 > 1000 group by s order by avg( ---+----- (0 rows) +select ss, avg(cfloat4) from aggfns where cfloat8 > 1000 group by ss order by avg(cfloat4), ss limit 10; + ss | avg +----+----- +(0 rows) + select max(cfloat4) from aggfns where cfloat8 > 1000; max ----- @@ -3959,6 +6680,11 @@ select s, max(cfloat4) from aggfns where cfloat8 > 1000 group by s order by max( ---+----- (0 rows) +select ss, max(cfloat4) from aggfns where cfloat8 > 1000 group by ss order by max(cfloat4), ss limit 10; + ss | max +----+----- +(0 rows) + select min(cfloat4) from aggfns where cfloat8 > 1000; min ----- @@ -3970,6 +6696,11 @@ select s, min(cfloat4) from aggfns where cfloat8 > 1000 group by s order by min( ---+----- (0 rows) +select ss, min(cfloat4) from aggfns where cfloat8 > 1000 group by ss order by min(cfloat4), ss limit 10; + ss | min +----+----- +(0 rows) + select stddev(cfloat4) from aggfns where cfloat8 > 1000; stddev -------- @@ -3981,6 +6712,11 @@ select s, stddev(cfloat4) from aggfns where cfloat8 > 1000 group by s order by s ---+-------- (0 rows) +select ss, stddev(cfloat4) from aggfns where cfloat8 > 1000 group by ss order by stddev(cfloat4), ss limit 10; + ss | stddev +----+-------- +(0 rows) + select sum(cfloat4) from aggfns where cfloat8 > 1000; sum ----- @@ -3992,6 +6728,11 @@ select s, sum(cfloat4) from aggfns where cfloat8 > 1000 group by s order by sum( ---+----- (0 rows) +select ss, sum(cfloat4) from aggfns where cfloat8 > 1000 group by ss order by sum(cfloat4), ss limit 10; + ss | sum +----+----- +(0 rows) + select avg(cfloat8) from aggfns where cfloat8 > 1000; avg ----- @@ -4003,6 +6744,11 @@ select s, avg(cfloat8) from aggfns where cfloat8 > 1000 group by s order by avg( ---+----- (0 rows) +select ss, avg(cfloat8) from aggfns where cfloat8 > 1000 group by ss order by avg(cfloat8), ss limit 10; + ss | avg +----+----- +(0 rows) + select max(cfloat8) from aggfns where cfloat8 > 1000; max ----- @@ -4014,6 +6760,11 @@ select s, max(cfloat8) from aggfns where cfloat8 > 1000 group by s order by max( ---+----- (0 rows) +select ss, max(cfloat8) from aggfns where cfloat8 > 1000 group by ss order by max(cfloat8), ss limit 10; + ss | max +----+----- +(0 rows) + select min(cfloat8) from aggfns where cfloat8 > 1000; min ----- @@ -4025,6 +6776,11 @@ select s, min(cfloat8) from aggfns where cfloat8 > 1000 group by s order by min( ---+----- (0 rows) +select ss, min(cfloat8) from aggfns where cfloat8 > 1000 group by ss order by min(cfloat8), ss limit 10; + ss | min +----+----- +(0 rows) + select stddev(cfloat8) from aggfns where cfloat8 > 1000; stddev -------- @@ -4036,6 +6792,11 @@ select s, stddev(cfloat8) from aggfns where cfloat8 > 1000 group by s order by s ---+-------- (0 rows) +select ss, stddev(cfloat8) from aggfns where cfloat8 > 1000 group by ss order by stddev(cfloat8), ss limit 10; + ss | stddev +----+-------- +(0 rows) + select sum(cfloat8) from aggfns where cfloat8 > 1000; sum ----- @@ -4047,6 +6808,11 @@ select s, sum(cfloat8) from aggfns where cfloat8 > 1000 group by s order by sum( ---+----- (0 rows) +select ss, sum(cfloat8) from aggfns where cfloat8 > 1000 group by ss order by sum(cfloat8), ss limit 10; + ss | sum +----+----- +(0 rows) + select avg(cint2) from aggfns where cfloat8 > 1000; avg ----- @@ -4058,6 +6824,11 @@ select s, avg(cint2) from aggfns where cfloat8 > 1000 group by s order by avg(ci ---+----- (0 rows) +select ss, avg(cint2) from aggfns where cfloat8 > 1000 group by ss order by avg(cint2), ss limit 10; + ss | avg +----+----- +(0 rows) + select count(cint2) from aggfns where cfloat8 > 1000; count ------- @@ -4069,6 +6840,11 @@ select s, count(cint2) from aggfns where cfloat8 > 1000 group by s order by coun ---+------- (0 rows) +select ss, count(cint2) from aggfns where cfloat8 > 1000 group by ss order by count(cint2), ss limit 10; + ss | count +----+------- +(0 rows) + select max(cint2) from aggfns where cfloat8 > 1000; max ----- @@ -4080,6 +6856,11 @@ select s, max(cint2) from aggfns where cfloat8 > 1000 group by s order by max(ci ---+----- (0 rows) +select ss, max(cint2) from aggfns where cfloat8 > 1000 group by ss order by max(cint2), ss limit 10; + ss | max +----+----- +(0 rows) + select min(cint2) from aggfns where cfloat8 > 1000; min ----- @@ -4091,6 +6872,11 @@ select s, min(cint2) from aggfns where cfloat8 > 1000 group by s order by min(ci ---+----- (0 rows) +select ss, min(cint2) from aggfns where cfloat8 > 1000 group by ss order by min(cint2), ss limit 10; + ss | min +----+----- +(0 rows) + select stddev(cint2) from aggfns where cfloat8 > 1000; stddev -------- @@ -4101,6 +6887,11 @@ select s, stddev(cint2) from aggfns where cfloat8 > 1000 group by s order by std ---+-------- (0 rows) +select ss, stddev(cint2) from aggfns where cfloat8 > 1000 group by ss order by stddev(cint2), ss limit 10; + ss | stddev +----+-------- +(0 rows) + select sum(cint2) from aggfns where cfloat8 > 1000; sum ----- @@ -4112,6 +6903,11 @@ select s, sum(cint2) from aggfns where cfloat8 > 1000 group by s order by sum(ci ---+----- (0 rows) +select ss, sum(cint2) from aggfns where cfloat8 > 1000 group by ss order by sum(cint2), ss limit 10; + ss | sum +----+----- +(0 rows) + select avg(cint4) from aggfns where cfloat8 > 1000; avg ----- @@ -4123,6 +6919,11 @@ select s, avg(cint4) from aggfns where cfloat8 > 1000 group by s order by avg(ci ---+----- (0 rows) +select ss, avg(cint4) from aggfns where cfloat8 > 1000 group by ss order by avg(cint4), ss limit 10; + ss | avg +----+----- +(0 rows) + select max(cint4) from aggfns where cfloat8 > 1000; max ----- @@ -4134,6 +6935,11 @@ select s, max(cint4) from aggfns where cfloat8 > 1000 group by s order by max(ci ---+----- (0 rows) +select ss, max(cint4) from aggfns where cfloat8 > 1000 group by ss order by max(cint4), ss limit 10; + ss | max +----+----- +(0 rows) + select min(cint4) from aggfns where cfloat8 > 1000; min ----- @@ -4145,6 +6951,11 @@ select s, min(cint4) from aggfns where cfloat8 > 1000 group by s order by min(ci ---+----- (0 rows) +select ss, min(cint4) from aggfns where cfloat8 > 1000 group by ss order by min(cint4), ss limit 10; + ss | min +----+----- +(0 rows) + select stddev(cint4) from aggfns where cfloat8 > 1000; stddev -------- @@ -4155,6 +6966,11 @@ select s, stddev(cint4) from aggfns where cfloat8 > 1000 group by s order by std ---+-------- (0 rows) +select ss, stddev(cint4) from aggfns where cfloat8 > 1000 group by ss order by stddev(cint4), ss limit 10; + ss | stddev +----+-------- +(0 rows) + select sum(cint4) from aggfns where cfloat8 > 1000; sum ----- @@ -4166,6 +6982,11 @@ select s, sum(cint4) from aggfns where cfloat8 > 1000 group by s order by sum(ci ---+----- (0 rows) +select ss, sum(cint4) from aggfns where cfloat8 > 1000 group by ss order by sum(cint4), ss limit 10; + ss | sum +----+----- +(0 rows) + select avg(cint8) from aggfns where cfloat8 > 1000; avg ----- @@ -4176,6 +6997,11 @@ select s, avg(cint8) from aggfns where cfloat8 > 1000 group by s order by avg(ci ---+----- (0 rows) +select ss, avg(cint8) from aggfns where cfloat8 > 1000 group by ss order by avg(cint8), ss limit 10; + ss | avg +----+----- +(0 rows) + select max(cint8) from aggfns where cfloat8 > 1000; max ----- @@ -4187,6 +7013,11 @@ select s, max(cint8) from aggfns where cfloat8 > 1000 group by s order by max(ci ---+----- (0 rows) +select ss, max(cint8) from aggfns where cfloat8 > 1000 group by ss order by max(cint8), ss limit 10; + ss | max +----+----- +(0 rows) + select min(cint8) from aggfns where cfloat8 > 1000; min ----- @@ -4198,6 +7029,11 @@ select s, min(cint8) from aggfns where cfloat8 > 1000 group by s order by min(ci ---+----- (0 rows) +select ss, min(cint8) from aggfns where cfloat8 > 1000 group by ss order by min(cint8), ss limit 10; + ss | min +----+----- +(0 rows) + select sum(cint8) from aggfns where cfloat8 > 1000; sum ----- @@ -4208,6 +7044,11 @@ select s, sum(cint8) from aggfns where cfloat8 > 1000 group by s order by sum(ci ---+----- (0 rows) +select ss, sum(cint8) from aggfns where cfloat8 > 1000 group by ss order by sum(cint8), ss limit 10; + ss | sum +----+----- +(0 rows) + select max(cts) from aggfns where cfloat8 > 1000; max ----- @@ -4219,6 +7060,11 @@ select s, max(cts) from aggfns where cfloat8 > 1000 group by s order by max(cts) ---+----- (0 rows) +select ss, max(cts) from aggfns where cfloat8 > 1000 group by ss order by max(cts), ss limit 10; + ss | max +----+----- +(0 rows) + select min(cts) from aggfns where cfloat8 > 1000; min ----- @@ -4230,6 +7076,11 @@ select s, min(cts) from aggfns where cfloat8 > 1000 group by s order by min(cts) ---+----- (0 rows) +select ss, min(cts) from aggfns where cfloat8 > 1000 group by ss order by min(cts), ss limit 10; + ss | min +----+----- +(0 rows) + select max(ctstz) from aggfns where cfloat8 > 1000; max ----- @@ -4241,6 +7092,11 @@ select s, max(ctstz) from aggfns where cfloat8 > 1000 group by s order by max(ct ---+----- (0 rows) +select ss, max(ctstz) from aggfns where cfloat8 > 1000 group by ss order by max(ctstz), ss limit 10; + ss | max +----+----- +(0 rows) + select min(ctstz) from aggfns where cfloat8 > 1000; min ----- @@ -4252,6 +7108,11 @@ select s, min(ctstz) from aggfns where cfloat8 > 1000 group by s order by min(ct ---+----- (0 rows) +select ss, min(ctstz) from aggfns where cfloat8 > 1000 group by ss order by min(ctstz), ss limit 10; + ss | min +----+----- +(0 rows) + select avg(s) from aggfns where cfloat8 > 1000; avg ----- @@ -4263,6 +7124,11 @@ select s, avg(s) from aggfns where cfloat8 > 1000 group by s order by avg(s), s ---+----- (0 rows) +select ss, avg(s) from aggfns where cfloat8 > 1000 group by ss order by avg(s), ss limit 10; + ss | avg +----+----- +(0 rows) + select count(s) from aggfns where cfloat8 > 1000; count ------- @@ -4274,6 +7140,11 @@ select s, count(s) from aggfns where cfloat8 > 1000 group by s order by count(s) ---+------- (0 rows) +select ss, count(s) from aggfns where cfloat8 > 1000 group by ss order by count(s), ss limit 10; + ss | count +----+------- +(0 rows) + select max(s) from aggfns where cfloat8 > 1000; max ----- @@ -4285,6 +7156,11 @@ select s, max(s) from aggfns where cfloat8 > 1000 group by s order by max(s), s ---+----- (0 rows) +select ss, max(s) from aggfns where cfloat8 > 1000 group by ss order by max(s), ss limit 10; + ss | max +----+----- +(0 rows) + select min(s) from aggfns where cfloat8 > 1000; min ----- @@ -4296,6 +7172,11 @@ select s, min(s) from aggfns where cfloat8 > 1000 group by s order by min(s), s ---+----- (0 rows) +select ss, min(s) from aggfns where cfloat8 > 1000 group by ss order by min(s), ss limit 10; + ss | min +----+----- +(0 rows) + select stddev(s) from aggfns where cfloat8 > 1000; stddev -------- @@ -4306,6 +7187,11 @@ select s, stddev(s) from aggfns where cfloat8 > 1000 group by s order by stddev( ---+-------- (0 rows) +select ss, stddev(s) from aggfns where cfloat8 > 1000 group by ss order by stddev(s), ss limit 10; + ss | stddev +----+-------- +(0 rows) + select sum(s) from aggfns where cfloat8 > 1000; sum ----- @@ -4317,6 +7203,11 @@ select s, sum(s) from aggfns where cfloat8 > 1000 group by s order by sum(s), s ---+----- (0 rows) +select ss, sum(s) from aggfns where cfloat8 > 1000 group by ss order by sum(s), ss limit 10; + ss | sum +----+----- +(0 rows) + select avg(ss) from aggfns where cfloat8 > 1000; avg ----- @@ -4328,6 +7219,11 @@ select s, avg(ss) from aggfns where cfloat8 > 1000 group by s order by avg(ss), ---+----- (0 rows) +select ss, avg(ss) from aggfns where cfloat8 > 1000 group by ss order by avg(ss), ss limit 10; + ss | avg +----+----- +(0 rows) + select max(ss) from aggfns where cfloat8 > 1000; max ----- @@ -4339,6 +7235,11 @@ select s, max(ss) from aggfns where cfloat8 > 1000 group by s order by max(ss), ---+----- (0 rows) +select ss, max(ss) from aggfns where cfloat8 > 1000 group by ss order by max(ss), ss limit 10; + ss | max +----+----- +(0 rows) + select min(ss) from aggfns where cfloat8 > 1000; min ----- @@ -4350,6 +7251,11 @@ select s, min(ss) from aggfns where cfloat8 > 1000 group by s order by min(ss), ---+----- (0 rows) +select ss, min(ss) from aggfns where cfloat8 > 1000 group by ss order by min(ss), ss limit 10; + ss | min +----+----- +(0 rows) + select stddev(ss) from aggfns where cfloat8 > 1000; stddev -------- @@ -4360,6 +7266,11 @@ select s, stddev(ss) from aggfns where cfloat8 > 1000 group by s order by stddev ---+-------- (0 rows) +select ss, stddev(ss) from aggfns where cfloat8 > 1000 group by ss order by stddev(ss), ss limit 10; + ss | stddev +----+-------- +(0 rows) + select sum(ss) from aggfns where cfloat8 > 1000; sum ----- @@ -4371,6 +7282,11 @@ select s, sum(ss) from aggfns where cfloat8 > 1000 group by s order by sum(ss), ---+----- (0 rows) +select ss, sum(ss) from aggfns where cfloat8 > 1000 group by ss order by sum(ss), ss limit 10; + ss | sum +----+----- +(0 rows) + select max(t) from aggfns where cfloat8 > 1000; max ----- @@ -4382,6 +7298,11 @@ select s, max(t) from aggfns where cfloat8 > 1000 group by s order by max(t), s ---+----- (0 rows) +select ss, max(t) from aggfns where cfloat8 > 1000 group by ss order by max(t), ss limit 10; + ss | max +----+----- +(0 rows) + select min(t) from aggfns where cfloat8 > 1000; min ----- @@ -4393,6 +7314,11 @@ select s, min(t) from aggfns where cfloat8 > 1000 group by s order by min(t), s ---+----- (0 rows) +select ss, min(t) from aggfns where cfloat8 > 1000 group by ss order by min(t), ss limit 10; + ss | min +----+----- +(0 rows) + select avg(cint2) from aggfns where cint2 is null; avg ----- @@ -4414,6 +7340,20 @@ select s, avg(cint2) from aggfns where cint2 is null group by s order by avg(cin 9 | (10 rows) +select ss, avg(cint2) from aggfns where cint2 is null group by ss order by avg(cint2), ss limit 10; + ss | avg +----+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 11 | +(9 rows) + select count(cint2) from aggfns where cint2 is null; count ------- @@ -4435,6 +7375,20 @@ select s, count(cint2) from aggfns where cint2 is null group by s order by count 9 | 0 (10 rows) +select ss, count(cint2) from aggfns where cint2 is null group by ss order by count(cint2), ss limit 10; + ss | count +----+------- + 0 | 0 + 3 | 0 + 4 | 0 + 5 | 0 + 6 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 11 | 0 +(9 rows) + select max(cint2) from aggfns where cint2 is null; max ----- @@ -4456,6 +7410,20 @@ select s, max(cint2) from aggfns where cint2 is null group by s order by max(cin 9 | (10 rows) +select ss, max(cint2) from aggfns where cint2 is null group by ss order by max(cint2), ss limit 10; + ss | max +----+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 11 | +(9 rows) + select min(cint2) from aggfns where cint2 is null; min ----- @@ -4477,6 +7445,20 @@ select s, min(cint2) from aggfns where cint2 is null group by s order by min(cin 9 | (10 rows) +select ss, min(cint2) from aggfns where cint2 is null group by ss order by min(cint2), ss limit 10; + ss | min +----+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 11 | +(9 rows) + select stddev(cint2) from aggfns where cint2 is null; stddev -------- @@ -4498,6 +7480,20 @@ select s, stddev(cint2) from aggfns where cint2 is null group by s order by stdd 9 | (10 rows) +select ss, stddev(cint2) from aggfns where cint2 is null group by ss order by stddev(cint2), ss limit 10; + ss | stddev +----+-------- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 11 | +(9 rows) + select sum(cint2) from aggfns where cint2 is null; sum ----- @@ -4519,6 +7515,38 @@ select s, sum(cint2) from aggfns where cint2 is null group by s order by sum(cin 9 | (10 rows) +select ss, sum(cint2) from aggfns where cint2 is null group by ss order by sum(cint2), ss limit 10; + ss | sum +----+----- + 0 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | + 9 | + 11 | +(9 rows) + +-- Test multiple aggregate functions as well. +select count(*), count(cint2), min(cfloat4), cint2 from aggfns group by cint2 +order by count(*) desc, cint2 limit 10 +; + count | count | min | cint2 +-------+-------+----------+-------- + 190 | 0 | -49.6644 | + 19 | 19 | -49.5299 | -8743 + 17 | 17 | -48.5497 | -12921 + 17 | 17 | -48.5697 | -701 + 17 | 17 | -47.2641 | 525 + 16 | 16 | -42.41 | -10234 + 16 | 16 | -47.2083 | -4609 + 16 | 16 | -49.9555 | -4261 + 16 | 16 | -35.1643 | -296 + 16 | 16 | -45.4426 | 1373 +(10 rows) + -- Test edge cases for various batch sizes and the filter matching around batch -- end. select count(*) from edges; @@ -4588,3 +7616,45 @@ select s, count(*), min(f1) from edges where f1 = 65 group by 1 order by 1; 12 | 1 | 65 (8 rows) +select ss, count(*), min(f1) from edges where f1 = 63 group by 1 order by 1; + ss | count | min +----+-------+----- + 3 | 1 | 63 + 4 | 1 | 63 + 5 | 1 | 63 + 6 | 1 | 63 + 7 | 1 | 63 + 8 | 1 | 63 + 9 | 1 | 63 + 10 | 1 | 63 + 11 | 1 | 63 + 12 | 1 | 63 +(10 rows) + +select ss, count(*), min(f1) from edges where f1 = 64 group by 1 order by 1; + ss | count | min +----+-------+----- + 4 | 1 | 64 + 5 | 1 | 64 + 6 | 1 | 64 + 7 | 1 | 64 + 8 | 1 | 64 + 9 | 1 | 64 + 10 | 1 | 64 + 11 | 1 | 64 + 12 | 1 | 64 +(9 rows) + +select ss, count(*), min(f1) from edges where f1 = 65 group by 1 order by 1; + ss | count | min +----+-------+----- + 5 | 1 | 65 + 6 | 1 | 65 + 7 | 1 | 65 + 8 | 1 | 65 + 9 | 1 | 65 + 10 | 1 | 65 + 11 | 1 | 65 + 12 | 1 | 65 +(8 rows) + diff --git a/tsl/test/sql/vector_agg_functions.sql b/tsl/test/sql/vector_agg_functions.sql index 0e6a0d0860f..2876a303fba 100644 --- a/tsl/test/sql/vector_agg_functions.sql +++ b/tsl/test/sql/vector_agg_functions.sql @@ -98,7 +98,7 @@ limit 1 set timescaledb.debug_require_vector_agg = :'guc_value'; ---- Uncomment to generate reference. Note that there are minor discrepancies ---- on float4 due to different numeric stability in our and PG implementations. --- set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'allow'; +--set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid'; select format('%sselect %s%s(%s) from aggfns%s%s%s;', @@ -142,7 +142,8 @@ from 'cint2 is null']) with ordinality as condition(condition, n), unnest(array[ null, - 's']) with ordinality as grouping(grouping, n) + 's', + 'ss']) with ordinality as grouping(grouping, n) where true and (explain is null /* or condition is null and grouping = 's' */) @@ -156,6 +157,11 @@ order by explain, condition.n, variable, function, grouping.n \gexec +-- Test multiple aggregate functions as well. +select count(*), count(cint2), min(cfloat4), cint2 from aggfns group by cint2 +order by count(*) desc, cint2 limit 10 +; + -- Test edge cases for various batch sizes and the filter matching around batch -- end. select count(*) from edges; @@ -164,3 +170,7 @@ select s, count(*) from edges group by 1 order by 1; select s, count(*), min(f1) from edges where f1 = 63 group by 1 order by 1; select s, count(*), min(f1) from edges where f1 = 64 group by 1 order by 1; select s, count(*), min(f1) from edges where f1 = 65 group by 1 order by 1; + +select ss, count(*), min(f1) from edges where f1 = 63 group by 1 order by 1; +select ss, count(*), min(f1) from edges where f1 = 64 group by 1 order by 1; +select ss, count(*), min(f1) from edges where f1 = 65 group by 1 order by 1; From b6cee0294be55ba28fa78ecf7d3d52a17c5f4941 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:23:45 +0100 Subject: [PATCH 42/58] remove extras --- .../nodes/vector_agg/grouping_policy_hash.c | 4 - .../vector_agg/hashing/hash_strategy_impl.c | 97 +------------------ .../vector_agg/hashing/hashing_strategy.h | 13 +-- 3 files changed, 6 insertions(+), 108 deletions(-) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 12e0f09700e..a2b388b4b4c 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -34,8 +34,6 @@ extern HashingStrategy single_fixed_2_strategy; extern HashingStrategy single_fixed_4_strategy; extern HashingStrategy single_fixed_8_strategy; -extern HashingStrategy single_text_strategy; -extern HashingStrategy serialized_strategy; static const GroupingPolicy grouping_policy_hash_functions; @@ -86,8 +84,6 @@ create_grouping_policy_hash(int num_agg_defs, VectorAggDef *agg_defs, int num_gr break; } - policy->hashing.key_body_mctx = policy->agg_extra_mctx; - policy->hashing.init(&policy->hashing, policy); return &policy->funcs; diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c index 5656e8d69b4..fa5c1f7c7d1 100644 --- a/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c @@ -157,92 +157,6 @@ FUNCTION_NAME(fill_offsets_impl)(BatchHashingParams params, int start_row, int e } } -/* - * For some configurations of hashing, we want to generate dedicated - * implementations that will be more efficient. For example, for 2-byte keys - * when all the batch and key rows are valid. - */ -#define APPLY_FOR_BATCH_FILTER(X, NAME, COND) \ - X(NAME##_nofilter, (COND) && (params.batch_filter == NULL)) \ - X(NAME##_filter, (COND) && (params.batch_filter != NULL)) - -#define APPLY_FOR_NULLABILITY(X, NAME, COND) \ - APPLY_FOR_BATCH_FILTER(X, NAME##_notnull, (COND) && params.single_key.buffers[0] == NULL) \ - APPLY_FOR_BATCH_FILTER(X, NAME##_nullable, (COND) && params.single_key.buffers[0] != NULL) - -#define APPLY_FOR_SCALARS(X, NAME, COND) \ - APPLY_FOR_BATCH_FILTER(X, \ - NAME##_noscalar_notnull, \ - (COND) && !params.have_scalar_or_nullable_columns) \ - APPLY_FOR_BATCH_FILTER(X, \ - NAME##_scalar_or_nullable, \ - (COND) && params.have_scalar_or_nullable_columns) - -#define APPLY_FOR_TYPE(X, NAME, COND) \ - APPLY_FOR_NULLABILITY(X, \ - NAME##_byval, \ - (COND) && \ - params.single_key.decompression_type == sizeof(OUTPUT_KEY_TYPE)) \ - APPLY_FOR_NULLABILITY(X, \ - NAME##_text, \ - (COND) && params.single_key.decompression_type == DT_ArrowText) \ - APPLY_FOR_NULLABILITY(X, \ - NAME##_dict, \ - (COND) && params.single_key.decompression_type == DT_ArrowTextDict) \ - APPLY_FOR_SCALARS(X, NAME##_multi, (COND) && params.single_key.decompression_type == DT_Invalid) - -#define APPLY_FOR_SPECIALIZATIONS(X) APPLY_FOR_TYPE(X, index, true) - -#define DEFINE(NAME, CONDITION) \ - static pg_noinline void FUNCTION_NAME( \ - NAME)(BatchHashingParams params, int start_row, int end_row) \ - { \ - if (!(CONDITION)) \ - { \ - pg_unreachable(); \ - } \ - \ - FUNCTION_NAME(fill_offsets_impl)(params, start_row, end_row); \ - } - -APPLY_FOR_SPECIALIZATIONS(DEFINE) - -#undef DEFINE - -static void -FUNCTION_NAME(dispatch_for_params)(BatchHashingParams params, int start_row, int end_row) -{ - if (params.num_grouping_columns == 0) - { - pg_unreachable(); - } - - if ((params.num_grouping_columns == 1) != (params.single_key.decompression_type != DT_Invalid)) - { - pg_unreachable(); - } - -#define DISPATCH(NAME, CONDITION) \ - if (CONDITION) \ - { \ - FUNCTION_NAME(NAME)(params, start_row, end_row); \ - } \ - else - - APPLY_FOR_SPECIALIZATIONS(DISPATCH) - { - /* Use a generic implementation if no specializations matched. */ - FUNCTION_NAME(fill_offsets_impl)(params, start_row, end_row); - } -#undef DISPATCH -} - -#undef APPLY_FOR_SPECIALIZATIONS - -/* - * In some special cases we call a more efficient specialization of the grouping - * function. - */ static void FUNCTION_NAME(fill_offsets)(GroupingPolicyHash *policy, DecompressBatchState *batch_state, int start_row, int end_row) @@ -251,16 +165,7 @@ FUNCTION_NAME(fill_offsets)(GroupingPolicyHash *policy, DecompressBatchState *ba BatchHashingParams params = build_batch_hashing_params(policy, batch_state); -#ifdef USE_DICT_HASHING - if (policy->use_key_index_for_dict) - { - Assert(params.single_key.decompression_type == DT_ArrowTextDict); - single_text_offsets_translate(params, start_row, end_row); - return; - } -#endif - - FUNCTION_NAME(dispatch_for_params)(params, start_row, end_row); + FUNCTION_NAME(fill_offsets_impl)(params, start_row, end_row); } HashingStrategy FUNCTION_NAME(strategy) = { diff --git a/tsl/src/nodes/vector_agg/hashing/hashing_strategy.h b/tsl/src/nodes/vector_agg/hashing/hashing_strategy.h index e2bfda3b180..73c6130ffca 100644 --- a/tsl/src/nodes/vector_agg/hashing/hashing_strategy.h +++ b/tsl/src/nodes/vector_agg/hashing/hashing_strategy.h @@ -15,6 +15,10 @@ typedef struct DecompressBatchState DecompressBatchState; typedef struct TupleTableSlot TupleTableSlot; +/* + * The hashing strategy manages the details of how the grouping keys are stored + * in a hash table. + */ typedef struct HashingStrategy { char *explain_name; @@ -38,12 +42,10 @@ typedef struct HashingStrategy * This is stored separately from hash table keys, because they might not * have the full column values, and also storing them contiguously here * leads to better memory access patterns when emitting the results. - * The details of the key storage are managed by the hashing strategy. The - * by-reference keys can use a separate memory context for dense storage. + * The details of the key storage are managed by the hashing strategy. */ Datum *restrict output_keys; uint64 num_allocated_output_keys; - MemoryContext key_body_mctx; /* * In single-column grouping, we store the null key outside of the hash @@ -52,11 +54,6 @@ typedef struct HashingStrategy * to reduce the hash table size. */ uint32 null_key_index; - - /* - * UMASH fingerprinting parameters. - */ - struct umash_params *umash_params; } HashingStrategy; void hash_strategy_output_key_alloc(GroupingPolicyHash *policy, DecompressBatchState *batch_state); From ecb1aec6721f87b92606604d4cedc07df7f52e9e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:25:53 +0100 Subject: [PATCH 43/58] ref --- tsl/test/expected/vectorized_aggregation.out | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tsl/test/expected/vectorized_aggregation.out b/tsl/test/expected/vectorized_aggregation.out index 0b0c266a332..a5623050760 100644 --- a/tsl/test/expected/vectorized_aggregation.out +++ b/tsl/test/expected/vectorized_aggregation.out @@ -405,18 +405,21 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY float_value; -> Parallel Append -> Custom Scan (VectorAgg) Output: _hyper_1_1_chunk.float_value, (PARTIAL sum(_hyper_1_1_chunk.segment_by_value)) + Grouping Policy: hashed with single 8-byte key -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk Output: _hyper_1_1_chunk.float_value, _hyper_1_1_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_11_chunk Output: compress_hyper_2_11_chunk._ts_meta_count, compress_hyper_2_11_chunk.segment_by_value, compress_hyper_2_11_chunk._ts_meta_min_1, compress_hyper_2_11_chunk._ts_meta_max_1, compress_hyper_2_11_chunk."time", compress_hyper_2_11_chunk.int_value, compress_hyper_2_11_chunk.float_value -> Custom Scan (VectorAgg) Output: _hyper_1_2_chunk.float_value, (PARTIAL sum(_hyper_1_2_chunk.segment_by_value)) + Grouping Policy: hashed with single 8-byte key -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk Output: _hyper_1_2_chunk.float_value, _hyper_1_2_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_12_chunk Output: compress_hyper_2_12_chunk._ts_meta_count, compress_hyper_2_12_chunk.segment_by_value, compress_hyper_2_12_chunk._ts_meta_min_1, compress_hyper_2_12_chunk._ts_meta_max_1, compress_hyper_2_12_chunk."time", compress_hyper_2_12_chunk.int_value, compress_hyper_2_12_chunk.float_value -> Custom Scan (VectorAgg) Output: _hyper_1_3_chunk.float_value, (PARTIAL sum(_hyper_1_3_chunk.segment_by_value)) + Grouping Policy: hashed with single 8-byte key -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_3_chunk Output: _hyper_1_3_chunk.float_value, _hyper_1_3_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_13_chunk @@ -456,7 +459,7 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY float_value; Group Key: _hyper_1_10_chunk.float_value -> Parallel Seq Scan on _timescaledb_internal._hyper_1_10_chunk Output: _hyper_1_10_chunk.float_value, _hyper_1_10_chunk.segment_by_value -(60 rows) +(63 rows) :EXPLAIN SELECT sum(segment_by_value) FROM testtable GROUP BY int_value; @@ -471,18 +474,21 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY int_value; -> Parallel Append -> Custom Scan (VectorAgg) Output: _hyper_1_1_chunk.int_value, (PARTIAL sum(_hyper_1_1_chunk.segment_by_value)) + Grouping Policy: hashed with single 4-byte key -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk Output: _hyper_1_1_chunk.int_value, _hyper_1_1_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_11_chunk Output: compress_hyper_2_11_chunk._ts_meta_count, compress_hyper_2_11_chunk.segment_by_value, compress_hyper_2_11_chunk._ts_meta_min_1, compress_hyper_2_11_chunk._ts_meta_max_1, compress_hyper_2_11_chunk."time", compress_hyper_2_11_chunk.int_value, compress_hyper_2_11_chunk.float_value -> Custom Scan (VectorAgg) Output: _hyper_1_2_chunk.int_value, (PARTIAL sum(_hyper_1_2_chunk.segment_by_value)) + Grouping Policy: hashed with single 4-byte key -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk Output: _hyper_1_2_chunk.int_value, _hyper_1_2_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_12_chunk Output: compress_hyper_2_12_chunk._ts_meta_count, compress_hyper_2_12_chunk.segment_by_value, compress_hyper_2_12_chunk._ts_meta_min_1, compress_hyper_2_12_chunk._ts_meta_max_1, compress_hyper_2_12_chunk."time", compress_hyper_2_12_chunk.int_value, compress_hyper_2_12_chunk.float_value -> Custom Scan (VectorAgg) Output: _hyper_1_3_chunk.int_value, (PARTIAL sum(_hyper_1_3_chunk.segment_by_value)) + Grouping Policy: hashed with single 4-byte key -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_3_chunk Output: _hyper_1_3_chunk.int_value, _hyper_1_3_chunk.segment_by_value -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_13_chunk @@ -522,7 +528,7 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY int_value; Group Key: _hyper_1_10_chunk.int_value -> Parallel Seq Scan on _timescaledb_internal._hyper_1_10_chunk Output: _hyper_1_10_chunk.int_value, _hyper_1_10_chunk.segment_by_value -(60 rows) +(63 rows) -- Vectorization possible with grouping by a segmentby column. :EXPLAIN From f64676fd328e9d99ca2d79ca2a0ee6d7bc22ff0e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:27:11 +0100 Subject: [PATCH 44/58] fixes --- tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c index fa5c1f7c7d1..319ec06c6fb 100644 --- a/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl.c @@ -80,7 +80,7 @@ FUNCTION_NAME(fill_offsets_impl)(BatchHashingParams params, int start_row, int e struct FUNCTION_NAME(hash) *restrict table = hashing->table; - HASH_TABLE_KEY_TYPE prev_hash_table_key; + HASH_TABLE_KEY_TYPE prev_hash_table_key = { 0 }; uint32 previous_key_index = 0; for (int row = start_row; row < end_row; row++) { From fab11fbb4fbe68480c191dd61f07e2675cba06b2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:30:30 +0100 Subject: [PATCH 45/58] benchmark single fixed-column hash grouping (2024-12-03 no. 11) From dff6dff7caf1afa691c740db8e1bab4864fffa68 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:34:34 +0100 Subject: [PATCH 46/58] cleanup --- .../nodes/vector_agg/grouping_policy_hash.c | 7 ------- .../nodes/vector_agg/grouping_policy_hash.h | 20 ------------------- 2 files changed, 27 deletions(-) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index a2b388b4b4c..85c81b813ff 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -100,13 +100,6 @@ gp_hash_reset(GroupingPolicy *obj) policy->hashing.reset(&policy->hashing); - /* - * Have to reset this because it's in the key body context which is also - * reset here. - */ - policy->tmp_key_storage = NULL; - policy->num_tmp_key_storage_bytes = 0; - policy->last_used_key_index = 0; policy->stat_input_valid_rows = 0; diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.h b/tsl/src/nodes/vector_agg/grouping_policy_hash.h index d7cda9d77c4..5e2d5146338 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.h +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.h @@ -130,26 +130,6 @@ typedef struct GroupingPolicyHash uint64 stat_input_total_rows; uint64 stat_input_valid_rows; uint64 stat_consecutive_keys; - - /* - * FIXME all the stuff below should be moved out. - */ - - /* - * Temporary key storages. Some hashing strategies need to put the key in a - * separate memory area, we don't want to alloc/free it on each row. - */ - uint8 *tmp_key_storage; - uint64 num_tmp_key_storage_bytes; - - /* - * For single text key that uses dictionary encoding, in some cases we first - * calculate the key indexes for the dictionary entries, and then translate - * it to the actual rows. - */ - uint32 *restrict key_index_for_dict; - uint64 num_key_index_for_dict; - bool use_key_index_for_dict; } GroupingPolicyHash; //#define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) From 831cadd80717c531ca8da0aa2d3333e8e029eae2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 4 Dec 2024 19:26:21 +0100 Subject: [PATCH 47/58] planning fixes for pg 17 --- tsl/src/nodes/vector_agg/plan.c | 103 ++++++++++++------- tsl/test/expected/vectorized_aggregation.out | 8 ++ tsl/test/sql/vectorized_aggregation.sql | 4 + 3 files changed, 80 insertions(+), 35 deletions(-) diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index 669a52ca203..5b5cc93bb73 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -355,10 +355,68 @@ can_vectorize_aggref(Aggref *aggref, CustomScan *custom) static bool can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) { + /* + * The Agg->numCols value can be less than the number of the non-aggregated + * vars in the aggregated targetlist, if some of them are equated to a + * constant. This behavior started with PG 16. This case is not very + * important, so we treat all non-aggregated columns as grouping columns to + * keep the vectorized aggregation node simple. + */ + int num_grouping_columns = 0; + bool all_segmentby = true; + Var *single_grouping_var = NULL; + + ListCell *lc; + foreach (lc, resolved_targetlist) + { + TargetEntry *target_entry = lfirst_node(TargetEntry, lc); + if (IsA(target_entry->expr, Aggref)) + { + continue; + } + + if (!IsA(target_entry->expr, Var)) + { + /* + * We shouldn't see anything except Vars or Aggrefs in the + * aggregated targetlists. Just say it's not vectorizable, because + * here we are working with arbitrary plans that we don't control. + */ + return false; + } + + num_grouping_columns++; + + Var *var = castNode(Var, target_entry->expr); + bool is_segmentby; + if (!is_vector_var(custom, (Expr *) var, &is_segmentby)) + { + return false; + } + + all_segmentby &= is_segmentby; + + /* + * If we have a single grouping column, record it for the additional + * checks later. + */ + if (num_grouping_columns == 1) + { + single_grouping_var = var; + } + else + { + single_grouping_var = NULL; + } + } + + Assert(num_grouping_columns == 1 || single_grouping_var == NULL); + Assert(num_grouping_columns >= agg->numCols); + /* * We support vectorized aggregation without grouping. */ - if (agg->numCols == 0) + if (num_grouping_columns == 0) { return true; } @@ -367,46 +425,21 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) * We support hashed vectorized grouping by one fixed-size by-value * compressed column. */ - if (agg->numCols == 1) + if (num_grouping_columns == 1) { - int offset = AttrNumberGetAttrOffset(agg->grpColIdx[0]); - TargetEntry *entry = list_nth(resolved_targetlist, offset); - - bool is_segmentby = false; - if (is_vector_var(custom, entry->expr, &is_segmentby)) + int16 typlen; + bool typbyval; + get_typlenbyval(single_grouping_var->vartype, &typlen, &typbyval); + if (typbyval && typlen > 0 && (size_t) typlen <= sizeof(Datum)) { - Var *var = castNode(Var, entry->expr); - int16 typlen; - bool typbyval; - get_typlenbyval(var->vartype, &typlen, &typbyval); - if (typbyval && typlen > 0 && (size_t) typlen <= sizeof(Datum)) - { - return true; - } + return true; } } /* * We support grouping by any number of columns if all of them are segmentby. */ - for (int i = 0; i < agg->numCols; i++) - { - int offset = AttrNumberGetAttrOffset(agg->grpColIdx[i]); - TargetEntry *entry = list_nth_node(TargetEntry, resolved_targetlist, offset); - - bool is_segmentby = false; - if (!is_vector_var(custom, entry->expr, &is_segmentby)) - { - return false; - } - - if (!is_segmentby) - { - return false; - } - } - - return true; + return all_segmentby; } /* @@ -593,7 +626,7 @@ try_insert_vector_agg_node(Plan *plan) if (!can_vectorize_grouping(agg, custom, resolved_targetlist)) { - /* No GROUP BY support for now. */ + /* The grouping is not vectorizable. */ return plan; } @@ -601,7 +634,7 @@ try_insert_vector_agg_node(Plan *plan) ListCell *lc; foreach (lc, resolved_targetlist) { - TargetEntry *target_entry = castNode(TargetEntry, lfirst(lc)); + TargetEntry *target_entry = lfirst_node(TargetEntry, lc); if (IsA(target_entry->expr, Aggref)) { Aggref *aggref = castNode(Aggref, target_entry->expr); diff --git a/tsl/test/expected/vectorized_aggregation.out b/tsl/test/expected/vectorized_aggregation.out index a5623050760..a218e385880 100644 --- a/tsl/test/expected/vectorized_aggregation.out +++ b/tsl/test/expected/vectorized_aggregation.out @@ -3503,3 +3503,11 @@ SELECT sum(float_value) FROM testtable2 GROUP BY tableoid ORDER BY 1 LIMIT 1; 82620 (1 row) +-- Postgres versions starting with 16 remove the grouping columns that are +-- equated to a constant. Check that our planning code handles this well. +SELECT sum(float_value), int_value FROM testtable2 WHERE int_value = 1 GROUP BY int_value; + sum | int_value +------+----------- + 3162 | 1 +(1 row) + diff --git a/tsl/test/sql/vectorized_aggregation.sql b/tsl/test/sql/vectorized_aggregation.sql index bafecd6b544..93866322c9b 100644 --- a/tsl/test/sql/vectorized_aggregation.sql +++ b/tsl/test/sql/vectorized_aggregation.sql @@ -407,3 +407,7 @@ RESET max_parallel_workers_per_gather; -- Can't group by a system column SELECT sum(float_value) FROM testtable2 GROUP BY tableoid ORDER BY 1 LIMIT 1; + +-- Postgres versions starting with 16 remove the grouping columns that are +-- equated to a constant. Check that our planning code handles this well. +SELECT sum(float_value), int_value FROM testtable2 WHERE int_value = 1 GROUP BY int_value; From 66403f2e2a2ff70767a39fab14bd80dd3caca441 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 4 Dec 2024 19:26:40 +0100 Subject: [PATCH 48/58] benchmark fixed-size hash grouping (2024-12-04 no. 152) From 99e5b041b4b643395af4246b9fdc8fea293df575 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 4 Dec 2024 19:46:51 +0100 Subject: [PATCH 49/58] remove some (yet) unused code --- .../vector_agg/hashing/batch_hashing_params.h | 33 ++++--------------- .../hash_strategy_impl_single_fixed_key.c | 12 +++---- 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/tsl/src/nodes/vector_agg/hashing/batch_hashing_params.h b/tsl/src/nodes/vector_agg/hashing/batch_hashing_params.h index a4db2a19b1c..0f18a0f5fe0 100644 --- a/tsl/src/nodes/vector_agg/hashing/batch_hashing_params.h +++ b/tsl/src/nodes/vector_agg/hashing/batch_hashing_params.h @@ -6,20 +6,14 @@ #pragma once +/* + * The data required to map the rows of the given compressed batch to the unique + * indexes of grouping keys, using a hash table. + */ typedef struct BatchHashingParams { const uint64 *batch_filter; - CompressedColumnValues single_key; - - int num_grouping_columns; - const CompressedColumnValues *grouping_column_values; - - /* - * Whether we have any scalar or nullable grouping columns in the current - * batch. This is used to select the more efficient implementation when we - * have none. - */ - bool have_scalar_or_nullable_columns; + CompressedColumnValues single_grouping_column; GroupingPolicyHash *restrict policy; @@ -32,24 +26,11 @@ build_batch_hashing_params(GroupingPolicyHash *policy, DecompressBatchState *bat BatchHashingParams params = { .policy = policy, .batch_filter = batch_state->vector_qual_result, - .num_grouping_columns = policy->num_grouping_columns, - .grouping_column_values = policy->current_batch_grouping_column_values, .result_key_indexes = policy->key_index_for_row, }; - Assert(policy->num_grouping_columns > 0); - if (policy->num_grouping_columns == 1) - { - params.single_key = policy->current_batch_grouping_column_values[0]; - } - - for (int i = 0; i < policy->num_grouping_columns; i++) - { - params.have_scalar_or_nullable_columns = - params.have_scalar_or_nullable_columns || - (policy->current_batch_grouping_column_values[i].decompression_type == DT_Scalar || - policy->current_batch_grouping_column_values[i].buffers[0] != NULL); - } + Assert(policy->num_grouping_columns == 1); + params.single_grouping_column = policy->current_batch_grouping_column_values[0]; return params; } diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl_single_fixed_key.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl_single_fixed_key.c index c8a3c2da57f..972567dcc10 100644 --- a/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl_single_fixed_key.c +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_impl_single_fixed_key.c @@ -29,15 +29,15 @@ FUNCTION_NAME(key_hashing_get_key)(BatchHashingParams params, int row, OUTPUT_KEY_TYPE *restrict output_key = (OUTPUT_KEY_TYPE *) output_key_ptr; HASH_TABLE_KEY_TYPE *restrict hash_table_key = (HASH_TABLE_KEY_TYPE *) hash_table_key_ptr; - if (unlikely(params.single_key.decompression_type == DT_Scalar)) + if (unlikely(params.single_grouping_column.decompression_type == DT_Scalar)) { - *output_key = DATUM_TO_OUTPUT_KEY(*params.single_key.output_value); - *valid = !*params.single_key.output_isnull; + *output_key = DATUM_TO_OUTPUT_KEY(*params.single_grouping_column.output_value); + *valid = !*params.single_grouping_column.output_isnull; } - else if (params.single_key.decompression_type == sizeof(OUTPUT_KEY_TYPE)) + else if (params.single_grouping_column.decompression_type == sizeof(OUTPUT_KEY_TYPE)) { - const OUTPUT_KEY_TYPE *values = params.single_key.buffers[1]; - *valid = arrow_row_is_valid(params.single_key.buffers[0], row); + const OUTPUT_KEY_TYPE *values = params.single_grouping_column.buffers[1]; + *valid = arrow_row_is_valid(params.single_grouping_column.buffers[0], row); *output_key = values[row]; } else From 9fccab9896994e6fd9ded0b55560fa6f9336274a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 16 Dec 2024 13:58:32 +0100 Subject: [PATCH 50/58] ref --- tsl/test/expected/vectorized_aggregation.out | 4 ++-- tsl/test/sql/vectorized_aggregation.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tsl/test/expected/vectorized_aggregation.out b/tsl/test/expected/vectorized_aggregation.out index dfee268fb41..01f690b743a 100644 --- a/tsl/test/expected/vectorized_aggregation.out +++ b/tsl/test/expected/vectorized_aggregation.out @@ -180,7 +180,7 @@ SELECT sum(segment_by_value) FROM testtable WHERE segment_by_value > 0; Filter: (_hyper_1_10_chunk.segment_by_value > 0) (65 rows) --- Vectorization not possible due to a used filter +-- Vectorization with filter on compressed columns :EXPLAIN SELECT sum(segment_by_value) FROM testtable WHERE segment_by_value > 0 AND int_value > 0; QUERY PLAN @@ -2858,7 +2858,7 @@ SELECT sum(int_value) FROM testtable; 3538 (1 row) --- Aggregation filters are not supported at the moment +-- Vectorizable aggregation filters are supported :EXPLAIN SELECT sum(segment_by_value) FILTER (WHERE segment_by_value > 99999) FROM testtable; QUERY PLAN diff --git a/tsl/test/sql/vectorized_aggregation.sql b/tsl/test/sql/vectorized_aggregation.sql index bafecd6b544..324a96716f5 100644 --- a/tsl/test/sql/vectorized_aggregation.sql +++ b/tsl/test/sql/vectorized_aggregation.sql @@ -42,7 +42,7 @@ SELECT sum(segment_by_value) FROM testtable; :EXPLAIN SELECT sum(segment_by_value) FROM testtable WHERE segment_by_value > 0; --- Vectorization not possible due to a used filter +-- Vectorization with filter on compressed columns :EXPLAIN SELECT sum(segment_by_value) FROM testtable WHERE segment_by_value > 0 AND int_value > 0; @@ -330,7 +330,7 @@ SELECT sum(segment_by_value) FROM testtable; SELECT sum(int_value) FROM testtable; --- Aggregation filters are not supported at the moment +-- Vectorizable aggregation filters are supported :EXPLAIN SELECT sum(segment_by_value) FILTER (WHERE segment_by_value > 99999) FROM testtable; From f5b648ada9b26ffcc9a2d5c2cc8ff0de4349cee7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:39:27 +0100 Subject: [PATCH 51/58] add test --- .../nodes/vector_agg/grouping_policy_hash.c | 1 + tsl/test/expected/vector_agg_filter.out | 21 +++++++++++++++++++ tsl/test/sql/vector_agg_filter.sql | 9 ++++++++ 3 files changed, 31 insertions(+) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 85c81b813ff..0853d9d47cc 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -149,6 +149,7 @@ compute_single_aggregate(GroupingPolicyHash *policy, const DecompressBatchState const size_t num_words = (batch_state->total_batch_rows + 63) / 64; const uint64 *filter = arrow_combine_validity(num_words, policy->tmp_filter, + agg_def->filter_result, batch_state->vector_qual_result, arg_validity_bitmap); diff --git a/tsl/test/expected/vector_agg_filter.out b/tsl/test/expected/vector_agg_filter.out index 13629d3e4b3..81d33e3e7d3 100644 --- a/tsl/test/expected/vector_agg_filter.out +++ b/tsl/test/expected/vector_agg_filter.out @@ -2962,4 +2962,25 @@ select count(*) filter (where cint2 = stable_abs(0)) from aggfilter; 7 (1 row) +-- With hash grouping +select + ss, + count(*) filter (where s != 5), + count(*) filter (where cint2 < 0) +from aggfilter +group by ss; + ss | count | count +----+-------+------- + 3 | 19981 | 10076 + 11 | 40019 | 20008 + 8 | 20000 | 10082 + | 19 | 12 + 9 | 20000 | 9961 + 7 | 20000 | 10008 + 5 | 0 | 9871 + 4 | 19981 | 10064 + 0 | 20000 | 9968 + 6 | 20000 | 10089 +(10 rows) + reset timescaledb.debug_require_vector_agg; diff --git a/tsl/test/sql/vector_agg_filter.sql b/tsl/test/sql/vector_agg_filter.sql index d3423d84eac..ec861a8f70d 100644 --- a/tsl/test/sql/vector_agg_filter.sql +++ b/tsl/test/sql/vector_agg_filter.sql @@ -113,9 +113,18 @@ reset timescaledb.debug_require_vector_agg; -- FILTER that is not vectorizable set timescaledb.debug_require_vector_agg = 'forbid'; select count(*) filter (where cint2 === 0) from aggfilter; + -- FILTER with stable function set timescaledb.debug_require_vector_agg = 'require'; select count(*) filter (where cint2 = stable_abs(0)) from aggfilter; +-- With hash grouping +select + ss, + count(*) filter (where s != 5), + count(*) filter (where cint2 < 0) +from aggfilter +group by ss; + reset timescaledb.debug_require_vector_agg; From dc6001daef1dccc579fee8701652f3f8d96ed0cb Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:45:59 +0100 Subject: [PATCH 52/58] typo --- tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c b/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c index 96679548d52..cdf7a0e84a4 100644 --- a/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c +++ b/tsl/src/nodes/vector_agg/hashing/hash_strategy_single_fixed_4.c @@ -5,7 +5,7 @@ */ /* - * Implementation of column hashing for a single fixed size 2-byte column. + * Implementation of column hashing for a single fixed size 4-byte column. */ #include From 0ea397a85e956a68e86f20db8c4a643123129a4c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 16 Dec 2024 16:21:35 +0100 Subject: [PATCH 53/58] disable parallel --- tsl/test/expected/vector_agg_functions.out | 2 ++ tsl/test/sql/vector_agg_functions.sql | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tsl/test/expected/vector_agg_functions.out b/tsl/test/expected/vector_agg_functions.out index bffb8d7af7d..2bd56502fb5 100644 --- a/tsl/test/expected/vector_agg_functions.out +++ b/tsl/test/expected/vector_agg_functions.out @@ -115,6 +115,7 @@ set timescaledb.debug_require_vector_agg = :'guc_value'; ---- Uncomment to generate reference. Note that there are minor discrepancies ---- on float4 due to different numeric stability in our and PG implementations. --set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid'; +set max_parallel_workers_per_gather = 0; select format('%sselect %s%s(%s) from aggfns%s%s%s;', explain, @@ -7658,3 +7659,4 @@ select ss, count(*), min(f1) from edges where f1 = 65 group by 1 order by 1; 12 | 1 | 65 (8 rows) +reset max_parallel_workers_per_gather; diff --git a/tsl/test/sql/vector_agg_functions.sql b/tsl/test/sql/vector_agg_functions.sql index 2876a303fba..ef5d3c2b8dc 100644 --- a/tsl/test/sql/vector_agg_functions.sql +++ b/tsl/test/sql/vector_agg_functions.sql @@ -100,6 +100,8 @@ set timescaledb.debug_require_vector_agg = :'guc_value'; ---- on float4 due to different numeric stability in our and PG implementations. --set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid'; +set max_parallel_workers_per_gather = 0; + select format('%sselect %s%s(%s) from aggfns%s%s%s;', explain, @@ -174,3 +176,5 @@ select s, count(*), min(f1) from edges where f1 = 65 group by 1 order by 1; select ss, count(*), min(f1) from edges where f1 = 63 group by 1 order by 1; select ss, count(*), min(f1) from edges where f1 = 64 group by 1 order by 1; select ss, count(*), min(f1) from edges where f1 = 65 group by 1 order by 1; + +reset max_parallel_workers_per_gather; From ea4dab1a2018f3cb9fb319849d51a1d2fa5cec70 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 16 Dec 2024 17:59:50 +0100 Subject: [PATCH 54/58] add order --- tsl/test/expected/vector_agg_filter.out | 17 ++++++++++------- tsl/test/sql/vector_agg_filter.sql | 7 +++++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/tsl/test/expected/vector_agg_filter.out b/tsl/test/expected/vector_agg_filter.out index 81d33e3e7d3..8310a828be5 100644 --- a/tsl/test/expected/vector_agg_filter.out +++ b/tsl/test/expected/vector_agg_filter.out @@ -68,6 +68,7 @@ vacuum freeze analyze aggfilter; set timescaledb.debug_require_vector_agg = 'require'; ---- Uncomment to generate reference. --set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'allow'; +set max_parallel_workers_per_gather = 0; select format('%sselect %s%s(%s)%s from aggfilter%s%s%s;', explain, @@ -2968,19 +2969,21 @@ select count(*) filter (where s != 5), count(*) filter (where cint2 < 0) from aggfilter -group by ss; +group by ss +order by 2, 3; ss | count | count ----+-------+------- - 3 | 19981 | 10076 - 11 | 40019 | 20008 - 8 | 20000 | 10082 - | 19 | 12 - 9 | 20000 | 9961 - 7 | 20000 | 10008 5 | 0 | 9871 + | 19 | 12 4 | 19981 | 10064 + 3 | 19981 | 10076 + 9 | 20000 | 9961 0 | 20000 | 9968 + 7 | 20000 | 10008 + 8 | 20000 | 10082 6 | 20000 | 10089 + 11 | 40019 | 20008 (10 rows) reset timescaledb.debug_require_vector_agg; +reset max_parallel_workers_per_gather; diff --git a/tsl/test/sql/vector_agg_filter.sql b/tsl/test/sql/vector_agg_filter.sql index ec861a8f70d..8a5aec27370 100644 --- a/tsl/test/sql/vector_agg_filter.sql +++ b/tsl/test/sql/vector_agg_filter.sql @@ -65,6 +65,8 @@ set timescaledb.debug_require_vector_agg = 'require'; ---- Uncomment to generate reference. --set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'allow'; +set max_parallel_workers_per_gather = 0; + select format('%sselect %s%s(%s)%s from aggfilter%s%s%s;', explain, @@ -124,7 +126,8 @@ select count(*) filter (where s != 5), count(*) filter (where cint2 < 0) from aggfilter -group by ss; +group by ss +order by 2, 3; reset timescaledb.debug_require_vector_agg; - +reset max_parallel_workers_per_gather; From 4b98e4610e350d73ef6dabd0529676d2957bfda3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:51:08 +0100 Subject: [PATCH 55/58] Update tsl/src/nodes/vector_agg/grouping_policy_hash.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Erik Nordström <819732+erimatnor@users.noreply.github.com> Signed-off-by: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> --- tsl/src/nodes/vector_agg/grouping_policy_hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.h b/tsl/src/nodes/vector_agg/grouping_policy_hash.h index 5e2d5146338..27286bb6696 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.h +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.h @@ -39,7 +39,7 @@ typedef struct GroupingPolicyHash GroupingPolicyHash; * rows of the batch, and for each aggregate function separately, to generate * simpler and potentially vectorizable code, and improve memory locality. * - * 3) After the input have ended, or if the memory limit is reached, the partial + * 3) After the input has ended, or if the memory limit is reached, the partial * results are emitted into the output slot. This is done in the order of unique * grouping key indexes, thereby preserving the incoming key order. This * guarantees that this policy works correctly even in a Partial GroupAggregate From b615dbe34f200878b14f7b3ebd6a5140b864b9ca Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:51:19 +0100 Subject: [PATCH 56/58] Update tsl/src/nodes/vector_agg/grouping_policy_hash.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Erik Nordström <819732+erimatnor@users.noreply.github.com> Signed-off-by: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> --- tsl/src/nodes/vector_agg/grouping_policy_hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.h b/tsl/src/nodes/vector_agg/grouping_policy_hash.h index 27286bb6696..3fc0016b444 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.h +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.h @@ -35,7 +35,7 @@ typedef struct GroupingPolicyHash GroupingPolicyHash; * strategy. * * 2) The key indexes are used to locate the aggregate function states - * corresponding to a given row, and update it. This is done in bulk for all + * corresponding to a given row's key, and update it. This is done in bulk for all * rows of the batch, and for each aggregate function separately, to generate * simpler and potentially vectorizable code, and improve memory locality. * From 045f59affc08b76b0f6e197e384c4e1c07435345 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Dec 2024 18:45:33 +0100 Subject: [PATCH 57/58] determine the grouping type at plan time --- tsl/src/nodes/vector_agg/exec.c | 23 +++---- tsl/src/nodes/vector_agg/grouping_policy.h | 16 ++++- .../nodes/vector_agg/grouping_policy_hash.c | 14 ++--- tsl/src/nodes/vector_agg/plan.c | 60 ++++++++++++------- tsl/src/nodes/vector_agg/plan.h | 9 +++ 5 files changed, 75 insertions(+), 47 deletions(-) diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index 9ada1a62b43..6d9b08fee65 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -21,6 +21,7 @@ #include "nodes/decompress_chunk/exec.h" #include "nodes/decompress_chunk/vector_quals.h" #include "nodes/vector_agg.h" +#include "nodes/vector_agg/plan.h" static int get_input_offset(DecompressChunkState *decompress_state, Var *var) @@ -187,22 +188,11 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) } /* - * Determine which grouping policy we are going to use. + * Create the grouping policy chosen at plan time. */ - bool all_segmentby = true; - for (int i = 0; i < vector_agg_state->num_grouping_columns; i++) - { - GroupingColumn *col = &vector_agg_state->grouping_columns[i]; - DecompressContext *dcontext = &decompress_state->decompress_context; - CompressionColumnDescription *desc = &dcontext->compressed_chunk_columns[col->input_offset]; - if (desc->type != SEGMENTBY_COLUMN) - { - all_segmentby = false; - break; - } - } - - if (all_segmentby) + const VectorAggGroupingType grouping_type = + intVal(list_nth(cscan->custom_private, VASI_GroupingType)); + if (grouping_type == VAGT_Batch) { /* * Per-batch grouping. @@ -222,7 +212,8 @@ vector_agg_begin(CustomScanState *node, EState *estate, int eflags) create_grouping_policy_hash(vector_agg_state->num_agg_defs, vector_agg_state->agg_defs, vector_agg_state->num_grouping_columns, - vector_agg_state->grouping_columns); + vector_agg_state->grouping_columns, + grouping_type); } } diff --git a/tsl/src/nodes/vector_agg/grouping_policy.h b/tsl/src/nodes/vector_agg/grouping_policy.h index e7c5af909d2..30a6dfd4690 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy.h +++ b/tsl/src/nodes/vector_agg/grouping_policy.h @@ -54,10 +54,24 @@ typedef struct GroupingPolicy char *(*gp_explain)(GroupingPolicy *gp); } GroupingPolicy; +/* + * The various types of grouping we might use, as determined at planning time. + * The hashed subtypes are all implemented by hash grouping policy. + */ +typedef enum +{ + VAGT_Invalid, + VAGT_Batch, + VAGT_HashSingleFixed2, + VAGT_HashSingleFixed4, + VAGT_HashSingleFixed8 +} VectorAggGroupingType; + extern GroupingPolicy *create_grouping_policy_batch(int num_agg_defs, VectorAggDef *agg_defs, int num_grouping_columns, GroupingColumn *grouping_columns); extern GroupingPolicy *create_grouping_policy_hash(int num_agg_defs, VectorAggDef *agg_defs, int num_grouping_columns, - GroupingColumn *grouping_columns); + GroupingColumn *grouping_columns, + VectorAggGroupingType grouping_type); diff --git a/tsl/src/nodes/vector_agg/grouping_policy_hash.c b/tsl/src/nodes/vector_agg/grouping_policy_hash.c index 0853d9d47cc..05a8e13d474 100644 --- a/tsl/src/nodes/vector_agg/grouping_policy_hash.c +++ b/tsl/src/nodes/vector_agg/grouping_policy_hash.c @@ -39,7 +39,7 @@ static const GroupingPolicy grouping_policy_hash_functions; GroupingPolicy * create_grouping_policy_hash(int num_agg_defs, VectorAggDef *agg_defs, int num_grouping_columns, - GroupingColumn *grouping_columns) + GroupingColumn *grouping_columns, VectorAggGroupingType grouping_type) { GroupingPolicyHash *policy = palloc0(sizeof(GroupingPolicyHash)); policy->funcs = grouping_policy_hash_functions; @@ -66,21 +66,19 @@ create_grouping_policy_hash(int num_agg_defs, VectorAggDef *agg_defs, int num_gr policy->current_batch_grouping_column_values = palloc(sizeof(CompressedColumnValues) * num_grouping_columns); - Assert(num_grouping_columns == 1); - const GroupingColumn *g = &policy->grouping_columns[0]; - switch (g->value_bytes) + switch (grouping_type) { - case 8: + case VAGT_HashSingleFixed8: policy->hashing = single_fixed_8_strategy; break; - case 4: + case VAGT_HashSingleFixed4: policy->hashing = single_fixed_4_strategy; break; - case 2: + case VAGT_HashSingleFixed2: policy->hashing = single_fixed_2_strategy; break; default: - Assert(false); + Ensure(false, "failed to determine the hashing strategy"); break; } diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index 0c9f1cac201..055300e6990 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -16,6 +16,7 @@ #include "plan.h" #include "exec.h" +#include "import/list.h" #include "nodes/decompress_chunk/planner.h" #include "nodes/decompress_chunk/vector_quals.h" #include "nodes/vector_agg.h" @@ -131,7 +132,8 @@ resolve_outer_special_vars(List *agg_tlist, CustomScan *custom) * node. */ static Plan * -vector_agg_plan_create(Agg *agg, CustomScan *decompress_chunk, List *resolved_targetlist) +vector_agg_plan_create(Agg *agg, CustomScan *decompress_chunk, List *resolved_targetlist, + VectorAggGroupingType grouping_type) { CustomScan *vector_agg = (CustomScan *) makeNode(CustomScan); vector_agg->custom_plans = list_make1(decompress_chunk); @@ -171,13 +173,9 @@ vector_agg_plan_create(Agg *agg, CustomScan *decompress_chunk, List *resolved_ta vector_agg->scan.plan.extParam = bms_copy(agg->plan.extParam); vector_agg->scan.plan.allParam = bms_copy(agg->plan.allParam); - List *grouping_child_output_offsets = NIL; - for (int i = 0; i < agg->numCols; i++) - { - grouping_child_output_offsets = - lappend_int(grouping_child_output_offsets, AttrNumberGetAttrOffset(agg->grpColIdx[i])); - } - vector_agg->custom_private = list_make1(grouping_child_output_offsets); + vector_agg->custom_private = ts_new_list(T_List, VASI_Count); + lfirst(list_nth_cell(vector_agg->custom_private, VASI_GroupingType)) = + makeInteger(grouping_type); return (Plan *) vector_agg; } @@ -427,10 +425,10 @@ can_vectorize_aggref(Aggref *aggref, CustomScan *custom, VectorQualInfo *vqi) } /* - * Whether we can perform vectorized aggregation with a given grouping. + * What vectorized grouping strategy we can use for the given grouping columns. */ -static bool -can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) +static VectorAggGroupingType +get_vectorized_grouping_type(Agg *agg, CustomScan *custom, List *resolved_targetlist) { /* * The Agg->numCols value can be less than the number of the non-aggregated @@ -459,7 +457,7 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) * aggregated targetlists. Just say it's not vectorizable, because * here we are working with arbitrary plans that we don't control. */ - return false; + return VAGT_Invalid; } num_grouping_columns++; @@ -468,7 +466,7 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) bool is_segmentby; if (!is_vector_var(custom, (Expr *) var, &is_segmentby)) { - return false; + return VAGT_Invalid; } all_segmentby &= is_segmentby; @@ -495,7 +493,15 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) */ if (num_grouping_columns == 0) { - return true; + return VAGT_Batch; + } + + /* + * We support grouping by any number of columns if all of them are segmentby. + */ + if (all_segmentby) + { + return VAGT_Batch; } /* @@ -507,16 +513,24 @@ can_vectorize_grouping(Agg *agg, CustomScan *custom, List *resolved_targetlist) int16 typlen; bool typbyval; get_typlenbyval(single_grouping_var->vartype, &typlen, &typbyval); - if (typbyval && typlen > 0 && (size_t) typlen <= sizeof(Datum)) + if (typbyval) { - return true; + switch (typlen) + { + case 2: + return VAGT_HashSingleFixed2; + case 4: + return VAGT_HashSingleFixed4; + case 8: + return VAGT_HashSingleFixed8; + default: + Ensure(false, "invalid fixed size %d of a vector type", typlen); + break; + } } } - /* - * We support grouping by any number of columns if all of them are segmentby. - */ - return all_segmentby; + return VAGT_Invalid; } /* @@ -709,7 +723,9 @@ try_insert_vector_agg_node(Plan *plan) */ List *resolved_targetlist = resolve_outer_special_vars(agg->plan.targetlist, custom); - if (!can_vectorize_grouping(agg, custom, resolved_targetlist)) + const VectorAggGroupingType grouping_type = + get_vectorized_grouping_type(agg, custom, resolved_targetlist); + if (grouping_type == VAGT_Invalid) { /* The grouping is not vectorizable. */ return plan; @@ -758,5 +774,5 @@ try_insert_vector_agg_node(Plan *plan) * Finally, all requirements are satisfied and we can vectorize this partial * aggregation node. */ - return vector_agg_plan_create(agg, custom, resolved_targetlist); + return vector_agg_plan_create(agg, custom, resolved_targetlist, grouping_type); } diff --git a/tsl/src/nodes/vector_agg/plan.h b/tsl/src/nodes/vector_agg/plan.h index 67f0e2957c9..94dabb35d6d 100644 --- a/tsl/src/nodes/vector_agg/plan.h +++ b/tsl/src/nodes/vector_agg/plan.h @@ -13,6 +13,15 @@ typedef struct VectorAggPlan CustomScan custom; } VectorAggPlan; +/* + * The indexes of settings that we have to pass through the custom_private list. + */ +typedef enum +{ + VASI_GroupingType = 0, + VASI_Count +} VectorAggSettingsIndex; + extern void _vector_agg_init(void); Plan *try_insert_vector_agg_node(Plan *plan); From df100f2b042dadf3d6091007b91aa2d707556b06 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 2 Jan 2025 15:28:08 +0100 Subject: [PATCH 58/58] cleanup --- tsl/src/nodes/vector_agg/plan.c | 14 ++-- tsl/test/expected/vectorized_aggregation.out | 72 +++++++++++++++++++- tsl/test/sql/vectorized_aggregation.sql | 6 +- 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index 055300e6990..e705d23f37a 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -475,14 +475,12 @@ get_vectorized_grouping_type(Agg *agg, CustomScan *custom, List *resolved_target * If we have a single grouping column, record it for the additional * checks later. */ - if (num_grouping_columns == 1) - { - single_grouping_var = var; - } - else - { - single_grouping_var = NULL; - } + single_grouping_var = var; + } + + if (num_grouping_columns != 1) + { + single_grouping_var = NULL; } Assert(num_grouping_columns == 1 || single_grouping_var == NULL); diff --git a/tsl/test/expected/vectorized_aggregation.out b/tsl/test/expected/vectorized_aggregation.out index 79b6b608d6f..a65b8dd1376 100644 --- a/tsl/test/expected/vectorized_aggregation.out +++ b/tsl/test/expected/vectorized_aggregation.out @@ -397,7 +397,7 @@ SELECT sum(segment_by_value) FROM testtable WHERE float_value > 0; Filter: (_hyper_1_10_chunk.float_value > '0'::double precision) (65 rows) --- Vectorization not possible due grouping +-- Vectorization possible with grouping by one fixed-size column :EXPLAIN SELECT sum(segment_by_value) FROM testtable GROUP BY float_value; QUERY PLAN @@ -536,6 +536,76 @@ SELECT sum(segment_by_value) FROM testtable GROUP BY int_value; Output: _hyper_1_10_chunk.int_value, _hyper_1_10_chunk.segment_by_value (63 rows) +-- Vectorization not possible with grouping by multiple columns +:EXPLAIN +SELECT sum(segment_by_value) FROM testtable GROUP BY int_value, float_value; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize HashAggregate + Output: sum(_hyper_1_1_chunk.segment_by_value), _hyper_1_1_chunk.int_value, _hyper_1_1_chunk.float_value + Group Key: _hyper_1_1_chunk.int_value, _hyper_1_1_chunk.float_value + -> Gather + Output: _hyper_1_1_chunk.int_value, _hyper_1_1_chunk.float_value, (PARTIAL sum(_hyper_1_1_chunk.segment_by_value)) + Workers Planned: 2 + -> Parallel Append + -> Partial HashAggregate + Output: _hyper_1_1_chunk.int_value, _hyper_1_1_chunk.float_value, PARTIAL sum(_hyper_1_1_chunk.segment_by_value) + Group Key: _hyper_1_1_chunk.int_value, _hyper_1_1_chunk.float_value + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.int_value, _hyper_1_1_chunk.float_value, _hyper_1_1_chunk.segment_by_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_11_chunk + Output: compress_hyper_2_11_chunk._ts_meta_count, compress_hyper_2_11_chunk.segment_by_value, compress_hyper_2_11_chunk._ts_meta_min_1, compress_hyper_2_11_chunk._ts_meta_max_1, compress_hyper_2_11_chunk."time", compress_hyper_2_11_chunk.int_value, compress_hyper_2_11_chunk.float_value + -> Partial HashAggregate + Output: _hyper_1_2_chunk.int_value, _hyper_1_2_chunk.float_value, PARTIAL sum(_hyper_1_2_chunk.segment_by_value) + Group Key: _hyper_1_2_chunk.int_value, _hyper_1_2_chunk.float_value + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.int_value, _hyper_1_2_chunk.float_value, _hyper_1_2_chunk.segment_by_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_12_chunk + Output: compress_hyper_2_12_chunk._ts_meta_count, compress_hyper_2_12_chunk.segment_by_value, compress_hyper_2_12_chunk._ts_meta_min_1, compress_hyper_2_12_chunk._ts_meta_max_1, compress_hyper_2_12_chunk."time", compress_hyper_2_12_chunk.int_value, compress_hyper_2_12_chunk.float_value + -> Partial HashAggregate + Output: _hyper_1_3_chunk.int_value, _hyper_1_3_chunk.float_value, PARTIAL sum(_hyper_1_3_chunk.segment_by_value) + Group Key: _hyper_1_3_chunk.int_value, _hyper_1_3_chunk.float_value + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_3_chunk + Output: _hyper_1_3_chunk.int_value, _hyper_1_3_chunk.float_value, _hyper_1_3_chunk.segment_by_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_13_chunk + Output: compress_hyper_2_13_chunk._ts_meta_count, compress_hyper_2_13_chunk.segment_by_value, compress_hyper_2_13_chunk._ts_meta_min_1, compress_hyper_2_13_chunk._ts_meta_max_1, compress_hyper_2_13_chunk."time", compress_hyper_2_13_chunk.int_value, compress_hyper_2_13_chunk.float_value + -> Partial HashAggregate + Output: _hyper_1_4_chunk.int_value, _hyper_1_4_chunk.float_value, PARTIAL sum(_hyper_1_4_chunk.segment_by_value) + Group Key: _hyper_1_4_chunk.int_value, _hyper_1_4_chunk.float_value + -> Parallel Seq Scan on _timescaledb_internal._hyper_1_4_chunk + Output: _hyper_1_4_chunk.int_value, _hyper_1_4_chunk.float_value, _hyper_1_4_chunk.segment_by_value + -> Partial HashAggregate + Output: _hyper_1_5_chunk.int_value, _hyper_1_5_chunk.float_value, PARTIAL sum(_hyper_1_5_chunk.segment_by_value) + Group Key: _hyper_1_5_chunk.int_value, _hyper_1_5_chunk.float_value + -> Parallel Seq Scan on _timescaledb_internal._hyper_1_5_chunk + Output: _hyper_1_5_chunk.int_value, _hyper_1_5_chunk.float_value, _hyper_1_5_chunk.segment_by_value + -> Partial HashAggregate + Output: _hyper_1_6_chunk.int_value, _hyper_1_6_chunk.float_value, PARTIAL sum(_hyper_1_6_chunk.segment_by_value) + Group Key: _hyper_1_6_chunk.int_value, _hyper_1_6_chunk.float_value + -> Parallel Seq Scan on _timescaledb_internal._hyper_1_6_chunk + Output: _hyper_1_6_chunk.int_value, _hyper_1_6_chunk.float_value, _hyper_1_6_chunk.segment_by_value + -> Partial HashAggregate + Output: _hyper_1_7_chunk.int_value, _hyper_1_7_chunk.float_value, PARTIAL sum(_hyper_1_7_chunk.segment_by_value) + Group Key: _hyper_1_7_chunk.int_value, _hyper_1_7_chunk.float_value + -> Parallel Seq Scan on _timescaledb_internal._hyper_1_7_chunk + Output: _hyper_1_7_chunk.int_value, _hyper_1_7_chunk.float_value, _hyper_1_7_chunk.segment_by_value + -> Partial HashAggregate + Output: _hyper_1_8_chunk.int_value, _hyper_1_8_chunk.float_value, PARTIAL sum(_hyper_1_8_chunk.segment_by_value) + Group Key: _hyper_1_8_chunk.int_value, _hyper_1_8_chunk.float_value + -> Parallel Seq Scan on _timescaledb_internal._hyper_1_8_chunk + Output: _hyper_1_8_chunk.int_value, _hyper_1_8_chunk.float_value, _hyper_1_8_chunk.segment_by_value + -> Partial HashAggregate + Output: _hyper_1_9_chunk.int_value, _hyper_1_9_chunk.float_value, PARTIAL sum(_hyper_1_9_chunk.segment_by_value) + Group Key: _hyper_1_9_chunk.int_value, _hyper_1_9_chunk.float_value + -> Parallel Seq Scan on _timescaledb_internal._hyper_1_9_chunk + Output: _hyper_1_9_chunk.int_value, _hyper_1_9_chunk.float_value, _hyper_1_9_chunk.segment_by_value + -> Partial HashAggregate + Output: _hyper_1_10_chunk.int_value, _hyper_1_10_chunk.float_value, PARTIAL sum(_hyper_1_10_chunk.segment_by_value) + Group Key: _hyper_1_10_chunk.int_value, _hyper_1_10_chunk.float_value + -> Parallel Seq Scan on _timescaledb_internal._hyper_1_10_chunk + Output: _hyper_1_10_chunk.int_value, _hyper_1_10_chunk.float_value, _hyper_1_10_chunk.segment_by_value +(63 rows) + -- Vectorization possible with grouping by a segmentby column. :EXPLAIN SELECT sum(int_value) FROM testtable GROUP BY segment_by_value; diff --git a/tsl/test/sql/vectorized_aggregation.sql b/tsl/test/sql/vectorized_aggregation.sql index e3deb9bbb16..86a9f5d801c 100644 --- a/tsl/test/sql/vectorized_aggregation.sql +++ b/tsl/test/sql/vectorized_aggregation.sql @@ -52,13 +52,17 @@ SELECT sum(segment_by_value) FROM testtable WHERE int_value > 0; :EXPLAIN SELECT sum(segment_by_value) FROM testtable WHERE float_value > 0; --- Vectorization not possible due grouping +-- Vectorization possible with grouping by one fixed-size column :EXPLAIN SELECT sum(segment_by_value) FROM testtable GROUP BY float_value; :EXPLAIN SELECT sum(segment_by_value) FROM testtable GROUP BY int_value; +-- Vectorization not possible with grouping by multiple columns +:EXPLAIN +SELECT sum(segment_by_value) FROM testtable GROUP BY int_value, float_value; + -- Vectorization possible with grouping by a segmentby column. :EXPLAIN SELECT sum(int_value) FROM testtable GROUP BY segment_by_value;