diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index 770da72fccc..5eba3af8da7 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -283,6 +283,7 @@ int gp_hashagg_groups_per_bucket = 5; int gp_motion_slice_noop = 0; /* Cloudberry Database Experimental Feature GUCs */ +bool gp_enable_explain_rows_out = false; bool gp_enable_explain_allstat = false; bool gp_enable_motion_deadlock_sanity = false; /* planning time sanity * check */ diff --git a/src/backend/commands/explain_gp.c b/src/backend/commands/explain_gp.c index 86985882483..50e443b42d5 100644 --- a/src/backend/commands/explain_gp.c +++ b/src/backend/commands/explain_gp.c @@ -941,7 +941,7 @@ cdbexplain_collectStatsFromNode(PlanState *planstate, CdbExplain_SendStatCtx *ct */ typedef struct CdbExplain_DepStatAcc { - /* vmax, vsum, vcnt, segmax */ + /* vmax, vmin, vsum, vcnt, segmax, segmin */ CdbExplain_Agg agg; /* max's received StatHdr */ CdbExplain_StatHdr *rshmax; @@ -1716,6 +1716,46 @@ cdbexplain_showExecStats(struct PlanState *planstate, ExplainState *es) } pfree(extraData.data); + /* + * Print "Rows out" + */ + + if (gp_enable_explain_rows_out && es->analyze && ns->ninst > 0) { + double ntuples_max = ns->ntuples.vmax; + int ntuples_imax = ns->ntuples.imax; + double ntuples_min = ns->ntuples.vmin; + int ntuples_imin = ns->ntuples.imin; + double ntuples_avg = cdbexplain_agg_avg(&ns->ntuples); + int ntuples_cnt = ns->ntuples.vcnt; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + /* + * create a header for all stats: separate each individual stat by an + * underscore, separate the grouped stats for each node by a slash + */ + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "Rows out: "); + + appendStringInfo(es->str, + "%.2f rows avg x %d workers, %.0f rows max (seg%d), %.0f rows min (seg%d).\n", + ntuples_avg, + ntuples_cnt, + ntuples_max, + ntuples_imax, + ntuples_min, + ntuples_imin); + } + else { + ExplainPropertyInteger("Workers", NULL, ntuples_cnt, es); + ExplainPropertyFloat("Average Rows", NULL, ntuples_avg, 1, es); + ExplainPropertyFloat("Max Rows", NULL, ntuples_max, 0, es); + ExplainPropertyInteger("Max Rows Segment", NULL, ntuples_imax, es); + ExplainPropertyFloat("Min Rows", NULL, ntuples_min, 0, es); + ExplainPropertyInteger("Min Rows Segment", NULL, ntuples_imin, es); + } + } + /* * Dump stats for all workers. */ diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index ff992db927e..d90901aec07 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -775,6 +775,17 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"gp_enable_explain_rows_out", PGC_USERSET, CLIENT_CONN_OTHER, + gettext_noop("Print avg, min and max rows out and which segments reach them in EXPLAIN ANALYZE."), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_enable_explain_rows_out, + false, + NULL, NULL, NULL + }, + { {"gp_enable_explain_allstat", PGC_USERSET, CLIENT_CONN_OTHER, gettext_noop("Experimental feature: dump stats for all segments in EXPLAIN ANALYZE."), diff --git a/src/include/cdb/cdbexplain.h b/src/include/cdb/cdbexplain.h index 67fd98de3bf..a63436f063d 100644 --- a/src/include/cdb/cdbexplain.h +++ b/src/include/cdb/cdbexplain.h @@ -26,18 +26,22 @@ struct CdbExplain_ShowStatCtx; /* private, in "cdb/cdbexplain.c" */ typedef struct { double vmax; /* maximum value of statistic */ + double vmin; /* minimum value of statistic */ double vsum; /* sum of values */ int vcnt; /* count of values > 0 */ int imax; /* id of 1st observation having maximum value */ + int imin; /* id of 1st observation having minimum value */ } CdbExplain_Agg; static inline void cdbexplain_agg_init0(CdbExplain_Agg *agg) { agg->vmax = 0; + agg->vmin = 0; agg->vsum = 0; agg->vcnt = 0; agg->imax = 0; + agg->imin = 0; } static inline bool @@ -48,6 +52,13 @@ cdbexplain_agg_upd(CdbExplain_Agg *agg, double v, int id) agg->vsum += v; agg->vcnt++; + if (v < agg->vmin || + agg->vcnt == 1) + { + agg->vmin = v; + agg->imin = id; + } + if (v > agg->vmax || agg->vcnt == 1) { diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 292786e37cd..73ee06bead2 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -630,6 +630,12 @@ extern bool gp_enable_agg_pushdown; */ extern bool gp_enable_preunique; +/* May Cloudberry print statistics as average, minimum and maximum rows out + * and on which segments reach them for each node during EXPLAIN ANALYZE? + * + */ +extern bool gp_enable_explain_rows_out; + /* May Cloudberry dump statistics for all segments as a huge ugly string * during EXPLAIN ANALYZE? * diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index a064c3dd444..20e0418de83 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -182,6 +182,7 @@ "gp_enable_agg_pushdown", "gp_enable_ao_indexscan", "gp_enable_direct_dispatch", + "gp_enable_explain_rows_out", "gp_enable_explain_allstat", "gp_enable_fast_sri", "gp_enable_global_deadlock_detector", diff --git a/src/test/regress/expected/gp_explain.out b/src/test/regress/expected/gp_explain.out index 70a1cdf8339..caa72c15e9b 100644 --- a/src/test/regress/expected/gp_explain.out +++ b/src/test/regress/expected/gp_explain.out @@ -436,6 +436,40 @@ explain analyze SELECT * FROM explaintest; (8 rows) set gp_enable_explain_allstat=DEFAULT; +-- Test explain rows out. +begin; +set local gp_enable_explain_rows_out=on; +create table tt (a int, b int); +explain(costs off, summary off, timing off, analyze) +insert into tt select * from generate_series(1,1000)a,generate_series(1,1000)b; +QUERY PLAN +___________ + Insert on tt (actual rows=0 loops=1) + Rows out: 0.00 rows avg x 0 workers, 0 rows max (seg0), 0 rows min (seg0). + -> Result (actual rows=340000 loops=1) + Rows out: 333333.33 rows avg x 3 workers, 340000 rows max (seg2), 322000 rows min (seg1). + -> Result (actual rows=340000 loops=1) + Rows out: 333333.33 rows avg x 3 workers, 340000 rows max (seg2), 322000 rows min (seg1). + -> Nested Loop (actual rows=1000000 loops=1) + Join Filter: true + Rows out: 1000000.00 rows avg x 3 workers, 1000000 rows max (seg0), 1000000 rows min (seg0). + -> Function Scan on generate_series generate_series_1 (actual rows=1000 loops=1) + Rows out: 1000.00 rows avg x 3 workers, 1000 rows max (seg0), 1000 rows min (seg0). + -> Function Scan on generate_series (actual rows=999 loops=1001) + Rows out: 1000001.00 rows avg x 3 workers, 1000001 rows max (seg0), 1000001 rows min (seg0). + +explain(costs off, summary off, timing off, analyze) +select * from tt where a > b; +QUERY PLAN +___________ + Gather Motion 3:1 (slice1; segments: 3) (actual rows=499500 loops=1) + Rows out: 499500.00 rows avg x 1 workers, 499500 rows max (seg-1), 499500 rows min (seg-1). + -> Seq Scan on tt (actual rows=172218 loops=1) + Filter: (a > b) + Rows Removed by Filter: 167782 + Rows out: 166500.00 rows avg x 3 workers, 172218 rows max (seg2), 160958 rows min (seg1). + +abort; -- -- Test GPDB-specific EXPLAIN (SLICETABLE) option. -- diff --git a/src/test/regress/expected/gp_explain_optimizer.out b/src/test/regress/expected/gp_explain_optimizer.out index ddf5dd580e9..0e8e4aef6aa 100644 --- a/src/test/regress/expected/gp_explain_optimizer.out +++ b/src/test/regress/expected/gp_explain_optimizer.out @@ -458,6 +458,40 @@ explain analyze SELECT * FROM explaintest; (8 rows) set gp_enable_explain_allstat=DEFAULT; +-- Test explain rows out. +begin; +set local gp_enable_explain_rows_out=on; +create table tt (a int, b int); +explain(costs off, summary off, timing off, analyze) +insert into tt select * from generate_series(1,1000)a,generate_series(1,1000)b; +QUERY PLAN +___________ + Insert on tt (actual rows=0 loops=1) + Rows out: 0.00 rows avg x 0 workers, 0 rows max (seg0), 0 rows min (seg0). + -> Result (actual rows=340000 loops=1) + Rows out: 333333.33 rows avg x 3 workers, 340000 rows max (seg2), 322000 rows min (seg1). + -> Result (actual rows=340000 loops=1) + Rows out: 333333.33 rows avg x 3 workers, 340000 rows max (seg2), 322000 rows min (seg1). + -> Nested Loop (actual rows=1000000 loops=1) + Join Filter: true + Rows out: 1000000.00 rows avg x 3 workers, 1000000 rows max (seg0), 1000000 rows min (seg0). + -> Function Scan on generate_series generate_series_1 (actual rows=1000 loops=1) + Rows out: 1000.00 rows avg x 3 workers, 1000 rows max (seg0), 1000 rows min (seg0). + -> Function Scan on generate_series (actual rows=999 loops=1001) + Rows out: 1000001.00 rows avg x 3 workers, 1000001 rows max (seg0), 1000001 rows min (seg0). + +explain(costs off, summary off, timing off, analyze) +select * from tt where a > b; +QUERY PLAN +___________ + Gather Motion 3:1 (slice1; segments: 3) (actual rows=499500 loops=1) + Rows out: 499500.00 rows avg x 1 workers, 499500 rows max (seg-1), 499500 rows min (seg-1). + -> Seq Scan on tt (actual rows=172218 loops=1) + Filter: (a > b) + Rows Removed by Filter: 167782 + Rows out: 166500.00 rows avg x 3 workers, 172218 rows max (seg2), 160958 rows min (seg1). + +abort; -- -- Test GPDB-specific EXPLAIN (SLICETABLE) option. -- diff --git a/src/test/regress/sql/gp_explain.sql b/src/test/regress/sql/gp_explain.sql index ca9bbdb7d69..302123cae06 100644 --- a/src/test/regress/sql/gp_explain.sql +++ b/src/test/regress/sql/gp_explain.sql @@ -228,6 +228,16 @@ set gp_enable_explain_allstat=on; explain analyze SELECT * FROM explaintest; set gp_enable_explain_allstat=DEFAULT; +-- Test explain rows out. +begin; +set local gp_enable_explain_rows_out=on; +create table tt (a int, b int); +explain(costs off, summary off, timing off, analyze) +insert into tt select * from generate_series(1,1000)a,generate_series(1,1000)b; +explain(costs off, summary off, timing off, analyze) +select * from tt where a > b; +abort; + -- -- Test GPDB-specific EXPLAIN (SLICETABLE) option.