diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index 9db3389e0bf..bf6b77c44ee 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -278,6 +278,7 @@ int gp_hashjoin_tuples_per_bucket = 5; int gp_motion_slice_noop = 0; /* Apache Cloudberry Experimental Feature GUCs */ +bool gp_enable_explain_rows_out = false; bool gp_enable_explain_allstat = false; bool gp_enable_motion_deadlock_sanity = false; /* planning time sanity * check */ diff --git a/src/backend/commands/explain_gp.c b/src/backend/commands/explain_gp.c index 27580fbd5fa..15a954f7cbc 100644 --- a/src/backend/commands/explain_gp.c +++ b/src/backend/commands/explain_gp.c @@ -944,7 +944,7 @@ cdbexplain_collectStatsFromNode(PlanState *planstate, CdbExplain_SendStatCtx *ct */ typedef struct CdbExplain_DepStatAcc { - /* vmax, vsum, vcnt, segmax */ + /* vmax, vmin, vsum, vcnt, segmax, segmin */ CdbExplain_Agg agg; /* max's received StatHdr */ CdbExplain_StatHdr *rshmax; @@ -1801,6 +1801,56 @@ cdbexplain_showExecStats(struct PlanState *planstate, ExplainState *es) } pfree(extraData.data); + /* + * Print "Rows out" + */ + + if (gp_enable_explain_rows_out && es->analyze && ns->ninst > 0) { + double ntuples_max = ns->ntuples.vmax; + int ntuples_imax = ns->ntuples.imax; + int ntuples_wmax = ns->ntuples.wmax; + double ntuples_min = ns->ntuples.vmin; + int ntuples_imin = ns->ntuples.imin; + int ntuples_wmin = ns->ntuples.wmin; + double ntuples_avg = cdbexplain_agg_avg(&ns->ntuples); + + int segments = ns->ninst; + int workers = ns->ntuples.vcnt; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + /* + * create a header for all stats: separate each individual stat by an + * underscore, separate the grouped stats for each node by a slash + */ + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "Rows out: "); + + appendStringInfo(es->str, + "%.2f rows avg x %d workers from %d segments, %.0f rows max (seg%d worker%d), %.0f rows min (seg%d worker%d).\n", + ntuples_avg, + workers, + segments, + ntuples_max, + ntuples_imax, + ntuples_wmax, + ntuples_min, + ntuples_imin, + ntuples_wmin); + } + else { + ExplainPropertyInteger("Workers", NULL,workers, es); + ExplainPropertyInteger("Segments", NULL, segments, es); + ExplainPropertyFloat("Average Rows", NULL, ntuples_avg, 1, es); + ExplainPropertyFloat("Max Rows", NULL, ntuples_max, 0, es); + ExplainPropertyInteger("Max Rows Segment", NULL, ntuples_imax, es); + ExplainPropertyInteger("Max Rows Worker", NULL, ntuples_wmax, es); + ExplainPropertyFloat("Min Rows", NULL, ntuples_min, 0, es); + ExplainPropertyInteger("Min Rows Segment", NULL, ntuples_imin, es); + ExplainPropertyInteger("Min Rows Segment", NULL, ntuples_wmin, es); + } + } + /* * Dump stats for all workers. */ diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index e342d762705..f0e74526b6c 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -815,6 +815,17 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"gp_enable_explain_rows_out", PGC_USERSET, CLIENT_CONN_OTHER, + gettext_noop("Print avg, min and max rows out and which segments reach them in EXPLAIN ANALYZE."), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_enable_explain_rows_out, + false, + NULL, NULL, NULL + }, + { {"gp_enable_explain_allstat", PGC_USERSET, CLIENT_CONN_OTHER, gettext_noop("Experimental feature: dump stats for all segments in EXPLAIN ANALYZE."), diff --git a/src/include/cdb/cdbexplain.h b/src/include/cdb/cdbexplain.h index 959cb1faec0..09fa90e893d 100644 --- a/src/include/cdb/cdbexplain.h +++ b/src/include/cdb/cdbexplain.h @@ -26,18 +26,26 @@ struct CdbExplain_ShowStatCtx; /* private, in "cdb/cdbexplain.c" */ typedef struct { double vmax; /* maximum value of statistic */ + double vmin; /* minimum value of statistic */ double vsum; /* sum of values */ int vcnt; /* count of values > 0 */ int imax; /* id of 1st observation having maximum value */ + int imin; /* id of 1st observation having minimum value */ + int wmax; /* worker id of 1st observation having maximum value */ + int wmin; /* worker id of 1st observation having minimum value */ } CdbExplain_Agg; static inline void cdbexplain_agg_init0(CdbExplain_Agg *agg) { agg->vmax = 0; + agg->vmin = 0; agg->vsum = 0; agg->vcnt = 0; agg->imax = 0; + agg->imin = 0; + agg->wmax = 0; + agg->wmin = 0; } static inline bool @@ -48,13 +56,23 @@ cdbexplain_agg_upd(CdbExplain_Agg *agg, double v, int id) agg->vsum += v; agg->vcnt++; + if (v < agg->vmin || + agg->vcnt == 1) + { + agg->vmin = v; + agg->imin = id; + agg->wmin = agg->vcnt - 1; + } + if (v > agg->vmax || agg->vcnt == 1) { agg->vmax = v; agg->imax = id; - return true; + agg->wmax = agg->vcnt - 1; } + + return agg->imin == id || agg->imax == id; } return false; } diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 90af5177ce0..76607179873 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -613,6 +613,12 @@ extern bool gp_enable_agg_pushdown; */ extern bool gp_enable_preunique; +/* May Cloudberry print statistics as average, minimum and maximum rows out + * and on which segments reach them for each node during EXPLAIN ANALYZE? + * + */ +extern bool gp_enable_explain_rows_out; + /* May Cloudberry dump statistics for all segments as a huge ugly string * during EXPLAIN ANALYZE? * diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 6cbf4b3179b..7dedb1b3e3c 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -182,6 +182,7 @@ "gp_enable_agg_pushdown", "gp_enable_ao_indexscan", "gp_enable_direct_dispatch", + "gp_enable_explain_rows_out", "gp_enable_explain_allstat", "gp_enable_fast_sri", "gp_enable_global_deadlock_detector", diff --git a/src/test/regress/expected/cbdb_parallel.out b/src/test/regress/expected/cbdb_parallel.out index b8639f9fb9c..9eb29e41a31 100644 --- a/src/test/regress/expected/cbdb_parallel.out +++ b/src/test/regress/expected/cbdb_parallel.out @@ -3068,6 +3068,34 @@ select t1_anti.a, t1_anti.b from t1_anti left join t2_anti on t1_anti.a = t2_ant 2 | (4 rows) +abort; +-- test rows out +-- start_matchsubs +-- m/\(actual rows=\d+ loops=\d+\)/ +-- s/\(actual rows=\d+ loops=\d+\)/(actual rows=# loops=#)/ +-- m/Rows Removed by Filter: \d+/ +-- s/Rows Removed by Filter: \d+/Rows Removed by Filter: ###/ +-- m/\d+ rows max \(seg\d+ worker\d+\), \d+ rows min \(seg\d+ worker\d+\)/ +-- s/\d+ rows max \(seg\d+ worker\d+\), \d+ rows min \(seg\d+ worker\d+\)/##### rows max (seg# worker#), ##### rows min (seg# worker#)/ +-- end_matchsubs +begin; +create table tt (a int, b int) with(parallel_workers=2) distributed by(a, b); +insert into tt select * from generate_series(1,1000)a,generate_series(1,1000)b; +set local enable_parallel = on; +set local max_parallel_workers_per_gather = 2; +set local gp_enable_explain_rows_out = on; +explain(costs off, summary off, timing off, analyze) select * from tt where a > b; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 6:1 (slice1; segments: 6) (actual rows=499500 loops=1) + Rows out: 499500.00 rows avg x 1 workers from 1 segments, 499500 rows max (seg-1 worker0), 499500 rows min (seg-1 worker0). + -> Parallel Seq Scan on tt (actual rows=86074 loops=1) + Filter: (a > b) + Rows Removed by Filter: 84852 + Rows out: 83250.00 rows avg x 6 workers from 3 segments, 86144 rows max (seg2 worker4), 80391 rows min (seg1 worker2). + Optimizer: Postgres query optimizer +(7 rows) + abort; -- start_ignore drop schema test_parallel cascade; diff --git a/src/test/regress/expected/gp_explain.out b/src/test/regress/expected/gp_explain.out index d71833f820b..861064b5a72 100644 --- a/src/test/regress/expected/gp_explain.out +++ b/src/test/regress/expected/gp_explain.out @@ -436,6 +436,42 @@ explain analyze SELECT * FROM explaintest; (8 rows) set gp_enable_explain_allstat=DEFAULT; +-- Test explain rows out. +begin; +set local gp_enable_explain_rows_out=on; +create table tt (a int, b int) distributed by(a, b); +explain(costs off, summary off, timing off, analyze) +insert into tt select * from generate_series(1,1000)a,generate_series(1,1000)b; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------- + Insert on tt (actual rows=0 loops=1) + Rows out: 0.00 rows avg x 0 workers from 3 segments, 0 rows max (seg0 worker0), 0 rows min (seg0 worker0). + -> Redistribute Motion 1:3 (slice1; segments: 1) (actual rows=333518 loops=1) + Hash Key: a.a, b.b + Rows out: 333333.33 rows avg x 3 workers from 3 segments, 333518 rows max (seg2 worker2), 333150 rows min (seg1 worker1). + -> Nested Loop (actual rows=1000000 loops=1) + Rows out: 1000000.00 rows avg x 1 workers from 1 segments, 1000000 rows max (seg0 worker0), 1000000 rows min (seg2 worker0). + -> Function Scan on generate_series a (actual rows=1000 loops=1) + Rows out: 1000.00 rows avg x 1 workers from 1 segments, 1000 rows max (seg0 worker0), 1000 rows min (seg2 worker0). + -> Function Scan on generate_series b (actual rows=1000 loops=1000) + Rows out: 1000000.00 rows avg x 1 workers from 1 segments, 1000000 rows max (seg0 worker0), 1000000 rows min (seg2 worker0). + Optimizer: Postgres query optimizer +(12 rows) + +explain(costs off, summary off, timing off, analyze) +select * from tt where a > b; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (actual rows=499500 loops=1) + Rows out: 499500.00 rows avg x 1 workers from 1 segments, 499500 rows max (seg-1 worker0), 499500 rows min (seg-1 worker0). + -> Seq Scan on tt (actual rows=166461 loops=1) + Filter: (a > b) + Rows Removed by Filter: 167057 + Rows out: 166500.00 rows avg x 3 workers from 3 segments, 166557 rows max (seg0 worker0), 166461 rows min (seg2 worker2). + Optimizer: Postgres query optimizer +(7 rows) + +abort; -- -- Test GPDB-specific EXPLAIN (SLICETABLE) option. -- diff --git a/src/test/regress/expected/gp_explain_optimizer.out b/src/test/regress/expected/gp_explain_optimizer.out index 0ad9ffea637..67b6f4732b1 100644 --- a/src/test/regress/expected/gp_explain_optimizer.out +++ b/src/test/regress/expected/gp_explain_optimizer.out @@ -458,6 +458,42 @@ explain analyze SELECT * FROM explaintest; (8 rows) set gp_enable_explain_allstat=DEFAULT; +-- Test explain rows out. +begin; +set local gp_enable_explain_rows_out=on; +create table tt (a int, b int) distributed by(a, b); +explain(costs off, summary off, timing off, analyze) +insert into tt select * from generate_series(1,1000)a,generate_series(1,1000)b; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------- + Insert on tt (actual rows=0 loops=1) + Rows out: 0.00 rows avg x 0 workers from 3 segments, 0 rows max (seg0 worker0), 0 rows min (seg0 worker0). + -> Redistribute Motion 1:3 (slice1; segments: 1) (actual rows=333518 loops=1) + Hash Key: a.a, b.b + Rows out: 333333.33 rows avg x 3 workers from 3 segments, 333518 rows max (seg2 worker2), 333150 rows min (seg1 worker1). + -> Nested Loop (actual rows=1000000 loops=1) + Rows out: 1000000.00 rows avg x 1 workers from 1 segments, 1000000 rows max (seg0 worker0), 1000000 rows min (seg2 worker0). + -> Function Scan on generate_series a (actual rows=1000 loops=1) + Rows out: 1000.00 rows avg x 1 workers from 1 segments, 1000 rows max (seg0 worker0), 1000 rows min (seg2 worker0). + -> Function Scan on generate_series b (actual rows=1000 loops=1000) + Rows out: 1000000.00 rows avg x 1 workers from 1 segments, 1000000 rows max (seg0 worker0), 1000000 rows min (seg2 worker0). + Optimizer: Postgres query optimizer +(12 rows) + +explain(costs off, summary off, timing off, analyze) +select * from tt where a > b; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (actual rows=499500 loops=1) + Rows out: 499500.00 rows avg x 1 workers from 1 segments, 499500 rows max (seg-1 worker0), 499500 rows min (seg-1 worker0). + -> Seq Scan on tt (actual rows=166461 loops=1) + Filter: (a > b) + Rows Removed by Filter: 167057 + Rows out: 166500.00 rows avg x 3 workers from 3 segments, 166557 rows max (seg0 worker0), 166461 rows min (seg2 worker2). + Optimizer: Postgres query optimizer +(7 rows) + +abort; -- -- Test GPDB-specific EXPLAIN (SLICETABLE) option. -- diff --git a/src/test/regress/sql/cbdb_parallel.sql b/src/test/regress/sql/cbdb_parallel.sql index b3fab79dd09..3fa0f137783 100644 --- a/src/test/regress/sql/cbdb_parallel.sql +++ b/src/test/regress/sql/cbdb_parallel.sql @@ -986,6 +986,24 @@ select t1_anti.a, t1_anti.b from t1_anti left join t2_anti on t1_anti.a = t2_ant select t1_anti.a, t1_anti.b from t1_anti left join t2_anti on t1_anti.a = t2_anti.a where t2_anti.a is null; abort; +-- test rows out +-- start_matchsubs +-- m/\(actual rows=\d+ loops=\d+\)/ +-- s/\(actual rows=\d+ loops=\d+\)/(actual rows=# loops=#)/ +-- m/Rows Removed by Filter: \d+/ +-- s/Rows Removed by Filter: \d+/Rows Removed by Filter: ###/ +-- m/\d+ rows max \(seg\d+ worker\d+\), \d+ rows min \(seg\d+ worker\d+\)/ +-- s/\d+ rows max \(seg\d+ worker\d+\), \d+ rows min \(seg\d+ worker\d+\)/##### rows max (seg# worker#), ##### rows min (seg# worker#)/ +-- end_matchsubs +begin; +create table tt (a int, b int) with(parallel_workers=2) distributed by(a, b); +insert into tt select * from generate_series(1,1000)a,generate_series(1,1000)b; +set local enable_parallel = on; +set local max_parallel_workers_per_gather = 2; +set local gp_enable_explain_rows_out = on; +explain(costs off, summary off, timing off, analyze) select * from tt where a > b; +abort; + -- start_ignore drop schema test_parallel cascade; -- end_ignore diff --git a/src/test/regress/sql/gp_explain.sql b/src/test/regress/sql/gp_explain.sql index 3cc60ed3d24..868117ef585 100644 --- a/src/test/regress/sql/gp_explain.sql +++ b/src/test/regress/sql/gp_explain.sql @@ -228,6 +228,16 @@ set gp_enable_explain_allstat=on; explain analyze SELECT * FROM explaintest; set gp_enable_explain_allstat=DEFAULT; +-- Test explain rows out. +begin; +set local gp_enable_explain_rows_out=on; +create table tt (a int, b int) distributed by(a, b); +explain(costs off, summary off, timing off, analyze) +insert into tt select * from generate_series(1,1000)a,generate_series(1,1000)b; +explain(costs off, summary off, timing off, analyze) +select * from tt where a > b; +abort; + -- -- Test GPDB-specific EXPLAIN (SLICETABLE) option.