From 8e09afa4ef1e911fcd3eeedb840164a43a694713 Mon Sep 17 00:00:00 2001 From: mkaruza Date: Fri, 22 Sep 2023 13:50:16 +0200 Subject: [PATCH] [columnar] Custom index scan * Custom index scan which pushes needed attributes to storage for faster index based scan. * Store complete stripe metadata list in memory --- columnar/src/backend/columnar/columnar.c | 12 + .../backend/columnar/columnar_customscan.c | 13 +- .../src/backend/columnar/columnar_indexscan.c | 926 ++++++++++++++++++ .../backend/columnar/columnar_planner_hook.c | 79 +- .../src/backend/columnar/columnar_tableam.c | 100 +- columnar/src/include/columnar/columnar.h | 1 + .../include/columnar/columnar_customscan.h | 1 + .../src/include/columnar/columnar_indexscan.h | 29 + .../src/include/columnar/columnar_tableam.h | 2 + columnar/src/test/regress/columnar_schedule | 1 + .../regress/expected/columnar_customindex.out | 43 + .../expected/columnar_customindex_1.out | 42 + .../test/regress/sql/columnar_customindex.sql | 33 + 13 files changed, 1264 insertions(+), 18 deletions(-) create mode 100644 columnar/src/backend/columnar/columnar_indexscan.c create mode 100644 columnar/src/include/columnar/columnar_indexscan.h create mode 100644 columnar/src/test/regress/expected/columnar_customindex.out create mode 100644 columnar/src/test/regress/expected/columnar_customindex_1.out create mode 100644 columnar/src/test/regress/sql/columnar_customindex.sql diff --git a/columnar/src/backend/columnar/columnar.c b/columnar/src/backend/columnar/columnar.c index 7e42b7aa..8c40d08b 100644 --- a/columnar/src/backend/columnar/columnar.c +++ b/columnar/src/backend/columnar/columnar.c @@ -49,6 +49,7 @@ bool columnar_enable_vectorization = true; bool columnar_enable_dml = true; bool columnar_enable_page_cache = true; int columnar_page_cache_size = 200U; +bool columnar_index_scan = false; static const struct config_enum_entry columnar_compression_options[] = { @@ -195,6 +196,17 @@ columnar_guc_init() NULL, NULL, NULL); + + DefineCustomBoolVariable("columnar.enable_columnar_index_scan", + gettext_noop("Enables custom columnar index scan"), + NULL, + &columnar_index_scan, + false, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, + NULL, + NULL); } diff --git a/columnar/src/backend/columnar/columnar_customscan.c b/columnar/src/backend/columnar/columnar_customscan.c index eb09a825..081bbcaf 100644 --- a/columnar/src/backend/columnar/columnar_customscan.c +++ b/columnar/src/backend/columnar/columnar_customscan.c @@ -178,7 +178,6 @@ static List * set_deparse_context_planstate(List *dpcontext, Node *node, /* other helpers */ static List * ColumnarVarNeeded(ColumnarScanState *columnarScanState); -static Bitmapset * ColumnarAttrNeeded(ScanState *ss, List *customList); static bool IsCreateTableAs(const char *query); /* saved hook value in case of unload */ @@ -608,9 +607,13 @@ CostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId, * instead of overwriting total cost, we "add" ours to the cost estimated * by indexAM since we should consider index traversal related costs too. */ - Cost columnarIndexScanCost = ColumnarIndexScanAdditionalCost(root, rel, relationId, - indexPath); - indexPath->path.total_cost += columnarIndexScanCost; + + if (!columnar_index_scan) + { + Cost columnarIndexScanCost = ColumnarIndexScanAdditionalCost(root, rel, relationId, + indexPath); + indexPath->path.total_cost += columnarIndexScanCost; + } ereport(DEBUG4, (errmsg("columnar table index scan costs re-estimated " "by columnarAM (including indexAM costs): " @@ -1986,7 +1989,7 @@ ColumnarScan_BeginCustomScan(CustomScanState *cscanstate, EState *estate, int ef * Throws an error if finds a Var referencing to an attribute not supported * by ColumnarScan. */ -static Bitmapset * +Bitmapset * ColumnarAttrNeeded(ScanState *ss, List *customList) { TupleTableSlot *slot = ss->ss_ScanTupleSlot; diff --git a/columnar/src/backend/columnar/columnar_indexscan.c b/columnar/src/backend/columnar/columnar_indexscan.c new file mode 100644 index 00000000..f2d01fb0 --- /dev/null +++ b/columnar/src/backend/columnar/columnar_indexscan.c @@ -0,0 +1,926 @@ +#include "postgres.h" + +#include "access/nbtree.h" +#include "access/relscan.h" +#include "access/tableam.h" +#include "catalog/pg_am.h" +#include "catalog/index.h" +#include "executor/execdebug.h" +#include "executor/nodeIndexscan.h" +#include "lib/pairingheap.h" +#include "miscadmin.h" +#include "nodes/extensible.h" +#include "nodes/nodeFuncs.h" +#include "storage/predicate.h" +#include "utils/array.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +#include "columnar/columnar_tableam.h" +#include "columnar/columnar_indexscan.h" +#include "columnar/columnar_customscan.h" + +/* + * When an ordering operator is used, tuples fetched from the index that + * need to be reordered are queued in a pairing heap, as ReorderTuples. + */ +typedef struct +{ + pairingheap_node ph_node; + HeapTuple htup; + Datum *orderbyvals; + bool *orderbynulls; +} ReorderTuple; + +static TupleTableSlot *IndexNext(IndexScanState *node); +static TupleTableSlot *IndexNextWithReorder(IndexScanState *node); +static void EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext); +static bool IndexRecheck(IndexScanState *node, TupleTableSlot *slot); +static int cmp_orderbyvals(const Datum *adist, const bool *anulls, + const Datum *bdist, const bool *bnulls, + IndexScanState *node); +static void reorderqueue_push(IndexScanState *node, TupleTableSlot *slot, + Datum *orderbyvals, bool *orderbynulls); +static HeapTuple reorderqueue_pop(IndexScanState *node); + + +#define RELATION_CHECKS \ +( \ + AssertMacro(RelationIsValid(indexRelation)), \ + AssertMacro(PointerIsValid(indexRelation->rd_indam)), \ + AssertMacro(!ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) \ +) + +#define CHECK_REL_PROCEDURE(pname) \ +do { \ + if (indexRelation->rd_indam->pname == NULL) \ + elog(ERROR, "function \"%s\" is not defined for index \"%s\"", \ + CppAsString(pname), RelationGetRelationName(indexRelation)); \ +} while(0) + + +static IndexScanDesc +index_beginscan_internal(Relation indexRelation, + int nkeys, int norderbys, Snapshot snapshot, + ParallelIndexScanDesc pscan, bool temp_snap) +{ + IndexScanDesc scan; + + RELATION_CHECKS; + CHECK_REL_PROCEDURE(ambeginscan); + + if (!(indexRelation->rd_indam->ampredlocks)) + PredicateLockRelation(indexRelation, snapshot); + + /* + * We hold a reference count to the relcache entry throughout the scan. + */ + RelationIncrementReferenceCount(indexRelation); + + /* + * Tell the AM to open a scan. + */ + scan = indexRelation->rd_indam->ambeginscan(indexRelation, nkeys, + norderbys); + /* Initialize information for parallel scan. */ + scan->parallel_scan = pscan; + scan->xs_temp_snap = temp_snap; + + return scan; +} + +/* + * index_beginscan - start a scan of an index with amgettuple + * + * Caller must be holding suitable locks on the heap and the index. + */ +static IndexScanDesc +columnar_index_beginscan(Relation heapRelation, + Relation indexRelation, + Snapshot snapshot, + int nkeys, int norderbys, + Bitmapset *attr_needed) +{ + IndexScanDesc scan; + + scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false); + + /* + * Save additional parameters into the scandesc. Everything else was set + * up by RelationGetIndexScan. + */ + scan->heapRelation = heapRelation; + scan->xs_snapshot = snapshot; + + /* prepare to fetch index matches from table */ + scan->xs_heapfetch = columnar_index_fetch_begin_extended(heapRelation, attr_needed); + + return scan; +} + +#include "optimizer/optimizer.h" + +/* ---------------------------------------------------------------- + * IndexNext + * + * Retrieve a tuple from the IndexScan node's currentRelation + * using the index specified in the IndexScanState information. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +IndexNext(IndexScanState *node) +{ + EState *estate; + ExprContext *econtext; + ScanDirection direction; + IndexScanDesc scandesc; + TupleTableSlot *slot; + + /* + * extract necessary information from index scan node + */ + estate = node->ss.ps.state; + direction = estate->es_direction; + /* flip direction if this is an overall backward scan */ + if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir)) + { + if (ScanDirectionIsForward(direction)) + direction = BackwardScanDirection; + else if (ScanDirectionIsBackward(direction)) + direction = ForwardScanDirection; + } + scandesc = node->iss_ScanDesc; + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + + if (scandesc == NULL) + { + /* + * We reach here if the index scan is not parallel, or if we're + * serially executing an index scan that was planned to be parallel. + */ + + Bitmapset *attr_needed = + ColumnarAttrNeeded(&node->ss, ((IndexScan *) node->ss.ps.plan)->indexqualorig); + + scandesc = + columnar_index_beginscan(node->ss.ss_currentRelation, + node->iss_RelationDesc, + estate->es_snapshot, + node->iss_NumScanKeys, + node->iss_NumOrderByKeys, + attr_needed); + + bms_free(attr_needed); + + node->iss_ScanDesc = scandesc; + + /* + * If no run-time keys to calculate or they are ready, go ahead and + * pass the scankeys to the index AM. + */ + if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady) + index_rescan(scandesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); + } + + /* + * ok, now that we have what we need, fetch the next tuple. + */ + while (index_getnext_slot(scandesc, direction, slot)) + { + CHECK_FOR_INTERRUPTS(); + + /* + * If the index was lossy, we have to recheck the index quals using + * the fetched tuple. + */ + if (scandesc->xs_recheck) + { + econtext->ecxt_scantuple = slot; + if (!ExecQualAndReset(node->indexqualorig, econtext)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + continue; + } + } + + return slot; + } + + /* + * if we get here it means the index scan failed so we are at the end of + * the scan.. + */ + node->iss_ReachedEnd = true; + return ExecClearTuple(slot); +} + +/* ---------------------------------------------------------------- + * IndexNextWithReorder + * + * Like IndexNext, but this version can also re-check ORDER BY + * expressions, and reorder the tuples as necessary. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +IndexNextWithReorder(IndexScanState *node) +{ + EState *estate; + ExprContext *econtext; + IndexScanDesc scandesc; + TupleTableSlot *slot; + ReorderTuple *topmost = NULL; + bool was_exact; + Datum *lastfetched_vals; + bool *lastfetched_nulls; + int cmp; + + estate = node->ss.ps.state; + + /* + * Only forward scan is supported with reordering. Note: we can get away + * with just Asserting here because the system will not try to run the + * plan backwards if ExecSupportsBackwardScan() says it won't work. + * Currently, that is guaranteed because no index AMs support both + * amcanorderbyop and amcanbackward; if any ever do, + * ExecSupportsBackwardScan() will need to consider indexorderbys + * explicitly. + */ + Assert(!ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir)); + Assert(ScanDirectionIsForward(estate->es_direction)); + + scandesc = node->iss_ScanDesc; + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + + if (scandesc == NULL) + { + /* + * We reach here if the index scan is not parallel, or if we're + * serially executing an index scan that was planned to be parallel. + */ + + Bitmapset *attr_needed = + ColumnarAttrNeeded(&node->ss, ((IndexScan *) node->ss.ps.plan)->indexqualorig); + + scandesc = + columnar_index_beginscan(node->ss.ss_currentRelation, + node->iss_RelationDesc, + estate->es_snapshot, + node->iss_NumScanKeys, + node->iss_NumOrderByKeys, + attr_needed); + + bms_free(attr_needed); + + node->iss_ScanDesc = scandesc; + + /* + * If no run-time keys to calculate or they are ready, go ahead and + * pass the scankeys to the index AM. + */ + if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady) + index_rescan(scandesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); + } + + for (;;) + { + CHECK_FOR_INTERRUPTS(); + + /* + * Check the reorder queue first. If the topmost tuple in the queue + * has an ORDER BY value smaller than (or equal to) the value last + * returned by the index, we can return it now. + */ + if (!pairingheap_is_empty(node->iss_ReorderQueue)) + { + topmost = (ReorderTuple *) pairingheap_first(node->iss_ReorderQueue); + + if (node->iss_ReachedEnd || + cmp_orderbyvals(topmost->orderbyvals, + topmost->orderbynulls, + scandesc->xs_orderbyvals, + scandesc->xs_orderbynulls, + node) <= 0) + { + HeapTuple tuple; + + tuple = reorderqueue_pop(node); + + /* Pass 'true', as the tuple in the queue is a palloc'd copy */ + ExecForceStoreHeapTuple(tuple, slot, true); + return slot; + } + } + else if (node->iss_ReachedEnd) + { + /* Queue is empty, and no more tuples from index. We're done. */ + return ExecClearTuple(slot); + } + + /* + * Fetch next tuple from the index. + */ +next_indextuple: + if (!index_getnext_slot(scandesc, ForwardScanDirection, slot)) + { + /* + * No more tuples from the index. But we still need to drain any + * remaining tuples from the queue before we're done. + */ + node->iss_ReachedEnd = true; + continue; + } + + /* + * If the index was lossy, we have to recheck the index quals and + * ORDER BY expressions using the fetched tuple. + */ + if (scandesc->xs_recheck) + { + econtext->ecxt_scantuple = slot; + if (!ExecQualAndReset(node->indexqualorig, econtext)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + /* allow this loop to be cancellable */ + CHECK_FOR_INTERRUPTS(); + goto next_indextuple; + } + } + + if (scandesc->xs_recheckorderby) + { + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + EvalOrderByExpressions(node, econtext); + + /* + * Was the ORDER BY value returned by the index accurate? The + * recheck flag means that the index can return inaccurate values, + * but then again, the value returned for any particular tuple + * could also be exactly correct. Compare the value returned by + * the index with the recalculated value. (If the value returned + * by the index happened to be exact right, we can often avoid + * pushing the tuple to the queue, just to pop it back out again.) + */ + cmp = cmp_orderbyvals(node->iss_OrderByValues, + node->iss_OrderByNulls, + scandesc->xs_orderbyvals, + scandesc->xs_orderbynulls, + node); + if (cmp < 0) + elog(ERROR, "index returned tuples in wrong order"); + else if (cmp == 0) + was_exact = true; + else + was_exact = false; + lastfetched_vals = node->iss_OrderByValues; + lastfetched_nulls = node->iss_OrderByNulls; + } + else + { + was_exact = true; + lastfetched_vals = scandesc->xs_orderbyvals; + lastfetched_nulls = scandesc->xs_orderbynulls; + } + + /* + * Can we return this tuple immediately, or does it need to be pushed + * to the reorder queue? If the ORDER BY expression values returned + * by the index were inaccurate, we can't return it yet, because the + * next tuple from the index might need to come before this one. Also, + * we can't return it yet if there are any smaller tuples in the queue + * already. + */ + if (!was_exact || (topmost && cmp_orderbyvals(lastfetched_vals, + lastfetched_nulls, + topmost->orderbyvals, + topmost->orderbynulls, + node) > 0)) + { + /* Put this tuple to the queue */ + reorderqueue_push(node, slot, lastfetched_vals, lastfetched_nulls); + continue; + } + else + { + /* Can return this tuple immediately. */ + return slot; + } + } + + /* + * if we get here it means the index scan failed so we are at the end of + * the scan.. + */ + return ExecClearTuple(slot); +} + +/* + * Calculate the expressions in the ORDER BY clause, based on the heap tuple. + */ +static void +EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext) +{ + int i; + ListCell *l; + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + i = 0; + foreach(l, node->indexorderbyorig) + { + ExprState *orderby = (ExprState *) lfirst(l); + + node->iss_OrderByValues[i] = ExecEvalExpr(orderby, + econtext, + &node->iss_OrderByNulls[i]); + i++; + } + + MemoryContextSwitchTo(oldContext); +} + +/* + * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +IndexRecheck(IndexScanState *node, TupleTableSlot *slot) +{ + ExprContext *econtext; + + /* + * extract necessary information from index scan node + */ + econtext = node->ss.ps.ps_ExprContext; + + /* Does the tuple meet the indexqual condition? */ + econtext->ecxt_scantuple = slot; + return ExecQualAndReset(node->indexqualorig, econtext); +} + + +/* + * Compare ORDER BY expression values. + */ +static int +cmp_orderbyvals(const Datum *adist, const bool *anulls, + const Datum *bdist, const bool *bnulls, + IndexScanState *node) +{ + int i; + int result; + + for (i = 0; i < node->iss_NumOrderByKeys; i++) + { + SortSupport ssup = &node->iss_SortSupport[i]; + + /* + * Handle nulls. We only need to support NULLS LAST ordering, because + * match_pathkeys_to_index() doesn't consider indexorderby + * implementation otherwise. + */ + if (anulls[i] && !bnulls[i]) + return 1; + else if (!anulls[i] && bnulls[i]) + return -1; + else if (anulls[i] && bnulls[i]) + return 0; + + result = ssup->comparator(adist[i], bdist[i], ssup); + if (result != 0) + return result; + } + + return 0; +} + + +/* + * Helper function to push a tuple to the reorder queue. + */ +static void +reorderqueue_push(IndexScanState *node, TupleTableSlot *slot, + Datum *orderbyvals, bool *orderbynulls) +{ + IndexScanDesc scandesc = node->iss_ScanDesc; + EState *estate = node->ss.ps.state; + MemoryContext oldContext = MemoryContextSwitchTo(estate->es_query_cxt); + ReorderTuple *rt; + int i; + + rt = (ReorderTuple *) palloc(sizeof(ReorderTuple)); + rt->htup = ExecCopySlotHeapTuple(slot); + rt->orderbyvals = + (Datum *) palloc(sizeof(Datum) * scandesc->numberOfOrderBys); + rt->orderbynulls = + (bool *) palloc(sizeof(bool) * scandesc->numberOfOrderBys); + for (i = 0; i < node->iss_NumOrderByKeys; i++) + { + if (!orderbynulls[i]) + rt->orderbyvals[i] = datumCopy(orderbyvals[i], + node->iss_OrderByTypByVals[i], + node->iss_OrderByTypLens[i]); + else + rt->orderbyvals[i] = (Datum) 0; + rt->orderbynulls[i] = orderbynulls[i]; + } + pairingheap_add(node->iss_ReorderQueue, &rt->ph_node); + + MemoryContextSwitchTo(oldContext); +} + +/* + * Helper function to pop the next tuple from the reorder queue. + */ +static HeapTuple +reorderqueue_pop(IndexScanState *node) +{ + HeapTuple result; + ReorderTuple *topmost; + int i; + + topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue); + + result = topmost->htup; + for (i = 0; i < node->iss_NumOrderByKeys; i++) + { + if (!node->iss_OrderByTypByVals[i] && !topmost->orderbynulls[i]) + pfree(DatumGetPointer(topmost->orderbyvals[i])); + } + pfree(topmost->orderbyvals); + pfree(topmost->orderbynulls); + pfree(topmost); + + return result; +} + + +/* ---------------------------------------------------------------- + * ExecIndexScan(node) + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +ExecIndexScan(PlanState *pstate) +{ + ColumnarIndexScanState *ciis = (ColumnarIndexScanState *) pstate; + IndexScanState *node = castNode(IndexScanState, ciis->indexscan_state); + + /* + * If we have runtime keys and they've not already been set up, do it now. + */ + if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady) + ExecReScan((PlanState *) node); + + if (node->iss_NumOrderByKeys > 0) + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndexNextWithReorder, + (ExecScanRecheckMtd) IndexRecheck); + else + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndexNext, + (ExecScanRecheckMtd) IndexRecheck); +} + +/* ---------------------------------------------------------------- + * ExecIndexScanInitializeDSM + * + * Set up a parallel index scan descriptor. + * ---------------------------------------------------------------- + */ +static void +ColumnarIndexScan_ExecIndexScanInitializeDSM(IndexScanState *node, + ParallelContext *pcxt, + void *coordinate) +{ + EState *estate = node->ss.ps.state; + ParallelIndexScanDesc piscan = (ParallelIndexScanDesc) coordinate; + + index_parallelscan_initialize(node->ss.ss_currentRelation, + node->iss_RelationDesc, + estate->es_snapshot, + piscan); + + node->iss_ScanDesc = + index_beginscan_parallel(node->ss.ss_currentRelation, + node->iss_RelationDesc, + node->iss_NumScanKeys, + node->iss_NumOrderByKeys, + piscan); + + /* + * If no run-time keys to calculate or they are ready, go ahead and pass + * the scankeys to the index AM. + */ + if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady) + index_rescan(node->iss_ScanDesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); +} + +/* ---------------------------------------------------------------- + * ExecIndexScanInitializeWorker + * + * Copy relevant information from TOC into planstate. + * ---------------------------------------------------------------- + */ + +static void +ColumnarIndexScan_ExecIndexScanInitializeWorker(IndexScanState *node, + void *coordinate) +{ + ParallelIndexScanDesc piscan = (ParallelIndexScanDesc) coordinate; + + node->iss_ScanDesc = + index_beginscan_parallel(node->ss.ss_currentRelation, + node->iss_RelationDesc, + node->iss_NumScanKeys, + node->iss_NumOrderByKeys, + piscan); + + /* + * If no run-time keys to calculate or they are ready, go ahead and pass + * the scankeys to the index AM. + */ + if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady) + index_rescan(node->iss_ScanDesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); +} + + + +/* CustomScanMethods */ +static Node * CreateColumnarIndexScanState(CustomScan *custom_plan); + +/* CustomScanExecMethods */ +static void ColumnarIndexScan_BeginCustomScan(CustomScanState *node, + EState *estate, int eflags); +static TupleTableSlot * ColumnarIndexScan_ExecCustomScan(CustomScanState *node); +static void ColumnarIndexScan_EndCustomScan(CustomScanState *node); +static void ColumnarIndexScan_ExplainCustomScan(CustomScanState *node, + List *ancestors, + ExplainState *es); +static Size ColumnarIndexScan_EstimateDSMCustomScan(CustomScanState *node, + ParallelContext *pcxt); +static void ColumnarIndexScan_InitializeDSMCustomScan(CustomScanState *node, + ParallelContext *pcxt, + void *coordinate); +static void ColumnarIndexScan_ReinitializeDSMCustomScan(CustomScanState *node, + ParallelContext *pcxt, + void *coordinate); +static void ColumnarIndexScan_InitializeWorkerCustomScan(CustomScanState *node, + shm_toc *toc, + void *coordinate); +static CustomScanMethods ColumnarIndexCustomScanMethods = { + "ColumnarIndexScan", /* CustomName */ + CreateColumnarIndexScanState, /* CreateCustomScanState */ +}; + +static CustomExecMethods ColumnarIndexScanExecMethods = { + .CustomName = "ColumnarIndexScan", + + .BeginCustomScan = ColumnarIndexScan_BeginCustomScan, + .ExecCustomScan = ColumnarIndexScan_ExecCustomScan, + .EndCustomScan = ColumnarIndexScan_EndCustomScan, + + .ExplainCustomScan = ColumnarIndexScan_ExplainCustomScan, + + .EstimateDSMCustomScan = ColumnarIndexScan_EstimateDSMCustomScan, + .InitializeDSMCustomScan = ColumnarIndexScan_InitializeDSMCustomScan, + .ReInitializeDSMCustomScan = ColumnarIndexScan_ReinitializeDSMCustomScan, + .InitializeWorkerCustomScan = ColumnarIndexScan_InitializeWorkerCustomScan +}; + + +static Node * +CreateColumnarIndexScanState(CustomScan *custom_plan) +{ + ColumnarIndexScanState *ciss = (ColumnarIndexScanState *) newNode( + sizeof(ColumnarIndexScanState), T_CustomScanState); + + CustomScanState *cscanstate = &ciss->css; + cscanstate->methods = &ColumnarIndexScanExecMethods; + + return (Node *) cscanstate; +} + + +static void +ColumnarIndexScan_BeginCustomScan(CustomScanState *css, EState *estate, int eflags) +{ + ColumnarIndexScanState *ciis = (ColumnarIndexScanState*) css; + CustomScan *cscan = (CustomScan *)css->ss.ps.plan; + IndexScan *isNode = (IndexScan *) linitial(cscan->custom_plans); + + /* Free the exprcontext */ + ExecFreeExprContext(&css->ss.ps); + + /* Clean out the tuple table */ + ExecClearTuple(css->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(css->ss.ss_ScanTupleSlot); + + ciis->indexscan_state = ExecInitIndexScan(isNode, estate, eflags); + + /*Initialize result type and projection. */ + ExecInitResultTypeTL(&ciis->css.ss.ps); +} + + +static TupleTableSlot * +ColumnarIndexScan_ExecCustomScan(CustomScanState *node) +{ + return ExecIndexScan((PlanState *) node); +} + + +static void +ColumnarIndexScan_EndCustomScan(CustomScanState *node) +{ + ExecEndIndexScan(((ColumnarIndexScanState *)node)->indexscan_state); +} + +#include "utils/ruleutils.h" +#include "nodes/makefuncs.h" + +/* + * Show a generic expression + */ +static void +show_expression(Node *node, const char *qlabel, + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) +{ + List *context; + char *exprstr; + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + planstate->plan, + ancestors); + + /* Deparse the expression */ + exprstr = deparse_expression(node, context, useprefix, false); + + /* And add to es->str */ + ExplainPropertyText(qlabel, exprstr, es); +} + +/* + * Show a qualifier expression (which is a List with implicit AND semantics) + */ +static void +show_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) +{ + Node *node; + + /* No work if empty qual */ + if (qual == NIL) + return; + + /* Convert AND list to explicit AND */ + node = (Node *) make_ands_explicit(qual); + + /* And show it */ + show_expression(node, qlabel, planstate, ancestors, useprefix, es); +} + +/* + * Show a qualifier expression for a scan plan node + */ +static void +show_scan_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + ExplainState *es) +{ + bool useprefix; + + useprefix = (IsA(planstate->plan, SubqueryScan) || es->verbose); + show_qual(qual, qlabel, planstate, ancestors, useprefix, es); +} + + +static void +show_instrumentation_count(const char *qlabel, int which, + PlanState *planstate, ExplainState *es) +{ + double nfiltered; + double nloops; + + if (!es->analyze || !planstate->instrument) + return; + + if (which == 2) + nfiltered = planstate->instrument->nfiltered2; + else + nfiltered = planstate->instrument->nfiltered1; + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) + { + if (nloops > 0) + ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es); + else + ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es); + } +} + +static void +ColumnarIndexScan_ExplainCustomScan(CustomScanState *node, List *ancestors, ExplainState *es) +{ + + ColumnarIndexScanState *ciis = (ColumnarIndexScanState*) node; + CustomScan *cscan = (CustomScan *) node->ss.ps.plan; + IndexScan *isNode = (IndexScan *) linitial(cscan->custom_plans); + + const char *indexname = get_rel_name(isNode->indexid); + + ExplainPropertyText("ColumnarIndexScan using ", indexname, es); + + show_scan_qual(((IndexScan *) isNode)->indexqualorig, + "Index Cond", &ciis->css.ss.ps, ancestors, es); + + if (isNode->indexqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + &ciis->css.ss.ps, es); + + show_scan_qual(isNode->indexorderbyorig, + "Order By", &ciis->css.ss.ps, ancestors, es); + + show_scan_qual(node->ss.ps.plan->qual, "Filter", &ciis->css.ss.ps, ancestors, es); + + if (node->ss.ps.plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + &ciis->css.ss.ps, es); +} + +/* Parallel Execution */ + +static Size +ColumnarIndexScan_EstimateDSMCustomScan(CustomScanState *node, + ParallelContext *pcxt) +{ + ColumnarIndexScanState *ciis = (ColumnarIndexScanState*) node; + + ExecIndexScanEstimate(ciis->indexscan_state, pcxt); + + return ciis->indexscan_state->iss_PscanLen; +} + + +static void +ColumnarIndexScan_InitializeDSMCustomScan(CustomScanState *node, + ParallelContext *pcxt, + void *coordinate) +{ + ColumnarIndexScanState *ciis = (ColumnarIndexScanState*) node; + ColumnarIndexScan_ExecIndexScanInitializeDSM(ciis->indexscan_state, pcxt, coordinate); +} + + +static void +ColumnarIndexScan_ReinitializeDSMCustomScan(CustomScanState *node, + ParallelContext *pcxt, + void *coordinate) +{ + ColumnarIndexScanState *ciis = (ColumnarIndexScanState*) node; + ExecIndexScanReInitializeDSM(ciis->indexscan_state, pcxt); +} + + +static void +ColumnarIndexScan_InitializeWorkerCustomScan(CustomScanState *node, + shm_toc *toc, + void *coordinate) +{ + ColumnarIndexScanState *ciis = (ColumnarIndexScanState*) node; + ColumnarIndexScan_ExecIndexScanInitializeWorker(ciis->indexscan_state, coordinate); +} + + +CustomScan * +columnar_create_indexscan_node(void) +{ + CustomScan *cscan = (CustomScan *) makeNode(CustomScan); + cscan->methods = &ColumnarIndexCustomScanMethods; + return cscan; +} + + +void +columnar_register_indexscan_node(void) +{ + RegisterCustomScanMethods(&ColumnarIndexCustomScanMethods); +} diff --git a/columnar/src/backend/columnar/columnar_planner_hook.c b/columnar/src/backend/columnar/columnar_planner_hook.c index 5a949f36..37f7ef3c 100644 --- a/columnar/src/backend/columnar/columnar_planner_hook.c +++ b/columnar/src/backend/columnar/columnar_planner_hook.c @@ -17,6 +17,8 @@ #include "access/amapi.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_am.h" +#include "catalog/pg_class.h" +#include "catalog/pg_index.h" #include "catalog/pg_statistic.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" @@ -39,12 +41,14 @@ #include "columnar/columnar.h" #include "columnar/columnar_customscan.h" +#include "columnar/columnar_indexscan.h" #include "columnar/vectorization/columnar_vector_execution.h" #include "columnar/vectorization/nodes/columnar_aggregator_node.h" #include "columnar/utils/listutils.h" static planner_hook_type PreviousPlannerHook = NULL; +static Oid columnar_tableam_oid = InvalidOid; static PlannedStmt * ColumnarPlannerHook(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams); @@ -57,11 +61,37 @@ typedef struct PlanTreeMutatorContext bool vectorizedAggregation; } PlanTreeMutatorContext; - #define FLATCOPY(newnode, node, nodetype) \ ( (newnode) = (nodetype *) palloc(sizeof(nodetype)), \ memcpy((newnode), (node), sizeof(nodetype)) ) + +static bool +columnar_index_table(Oid indexOid, Oid columnarTableAmOid) +{ + HeapTuple ht_idx; + Form_pg_index idxrec; + HeapTuple ht_table; + Form_pg_class tablerec; + bool index_on_columnar = false; + + /* + * Fetch the pg_index tuple by the Oid of the index + */ + ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid)); + idxrec = (Form_pg_index) GETSTRUCT(ht_idx); + + ht_table = SearchSysCache1(RELOID, ObjectIdGetDatum(idxrec->indrelid)); + tablerec = (Form_pg_class) GETSTRUCT(ht_table); + + index_on_columnar = tablerec->relam == columnarTableAmOid; + + ReleaseSysCache(ht_idx); + ReleaseSysCache(ht_table); + + return index_on_columnar; +} + static Node * AggRefArgsExpressionMutator(Node *node, void *context) { @@ -192,6 +222,9 @@ PlanTreeMutator(Plan *node, void *context) Agg *newAgg; CustomScan *vectorizedAggNode; + if (!columnar_enable_vectorization) + return node; + if (aggNode->plan.lefttree->type == T_CustomScan) { if (aggNode->aggstrategy == AGG_PLAIN) @@ -236,9 +269,41 @@ PlanTreeMutator(Plan *node, void *context) break; } + case T_IndexScan: + { + if (!columnar_index_scan) + return node; + + IndexScan *indexScanNode = (IndexScan *) node; + IndexScan *newIndexScan; + CustomScan *columnarIndexScan; + + /* Check if index is build on columnar table */ + if (!columnar_index_table(indexScanNode->indexid, columnar_tableam_oid)) + return node; + + columnarIndexScan = columnar_create_indexscan_node(); + FLATCOPY(newIndexScan, indexScanNode, IndexScan); + + columnarIndexScan->custom_plans = + lappend(columnarIndexScan->custom_plans, newIndexScan); + + columnarIndexScan->scan.plan.targetlist = + CustomBuildTargetList(indexScanNode->scan.plan.targetlist, INDEX_VAR); + + columnarIndexScan->custom_scan_tlist = newIndexScan->scan.plan.targetlist; + + Plan *columnarIndexScanPlan = (Plan *) columnarIndexScan; + columnarIndexScanPlan->parallel_aware = indexScanNode->scan.plan.parallel_aware; + columnarIndexScanPlan->startup_cost = indexScanNode->scan.plan.startup_cost; + columnarIndexScanPlan->total_cost = indexScanNode->scan.plan.total_cost; + columnarIndexScanPlan->plan_rows = indexScanNode->scan.plan.plan_rows; + columnarIndexScanPlan->plan_width = indexScanNode->scan.plan.plan_width; + + return (Plan *) columnarIndexScan; + } default: { - break; } } @@ -269,11 +334,14 @@ ColumnarPlannerHook(Query *parse, stmt = standard_planner(parse, query_string, cursorOptions, boundParams); #if PG_VERSION_NUM >= PG_VERSION_14 - if (!columnar_enable_vectorization /* Vectorization should be enabled */ - || stmt->commandType != CMD_SELECT /* only SELECTS are supported */ - || list_length(stmt->rtable) != 1) /* JOINs are not yet supported */ + if (!(columnar_enable_vectorization /* Vectorization should be enabled */ + || columnar_index_scan) /* or Columnar Index Scan */ + || stmt->commandType != CMD_SELECT /* only SELECTS are supported */ + || list_length(stmt->rtable) != 1) /* JOINs are not yet supported */ return stmt; + if (columnar_tableam_oid == InvalidOid) + columnar_tableam_oid = get_table_am_oid("columnar", true); savedPlanTree = stmt->planTree; savedSubplan = stmt->subplans; @@ -329,4 +397,5 @@ void columnar_planner_init(void) #if PG_VERSION_NUM >= PG_VERSION_14 columnar_register_aggregator_node(); #endif + columnar_register_indexscan_node(); } \ No newline at end of file diff --git a/columnar/src/backend/columnar/columnar_tableam.c b/columnar/src/backend/columnar/columnar_tableam.c index 396aaca7..c40970c2 100644 --- a/columnar/src/backend/columnar/columnar_tableam.c +++ b/columnar/src/backend/columnar/columnar_tableam.c @@ -111,6 +111,9 @@ typedef struct IndexFetchColumnarData { IndexFetchTableData cs_base; ColumnarReadState *cs_readState; + Bitmapset *attr_needed; + List *stripeMetadataList; + bool is_select_query; /* CustomIndexScan only gets planned with SELECT query */ /* * We initialize cs_readState lazily in the first columnar_index_fetch_tuple @@ -502,7 +505,37 @@ columnar_index_fetch_begin(Relation rel) IndexFetchColumnarData *scan = palloc0(sizeof(IndexFetchColumnarData)); scan->cs_base.rel = rel; scan->cs_readState = NULL; + scan->stripeMetadataList = NIL; scan->scanContext = scanContext; + scan->is_select_query = false; + + MemoryContextSwitchTo(oldContext); + + return &scan->cs_base; +} + +IndexFetchTableData * +columnar_index_fetch_begin_extended(Relation rel, Bitmapset *attr_needed) +{ + Oid relfilenode = rel->rd_node.relNode; + if (PendingWritesInUpperTransactions(relfilenode, GetCurrentSubTransactionId())) + { + /* XXX: maybe we can just flush the data and continue */ + elog(ERROR, "cannot read from index when there is unflushed data in " + "upper transactions"); + } + + MemoryContext scanContext = CreateColumnarScanMemoryContext(); + MemoryContext oldContext = MemoryContextSwitchTo(scanContext); + + IndexFetchColumnarData *scan = palloc0(sizeof(IndexFetchColumnarData)); + scan->cs_base.rel = rel; + scan->cs_readState = NULL; + scan->stripeMetadataList = NIL; + scan->scanContext = scanContext; + + scan->attr_needed = bms_copy(attr_needed); + scan->is_select_query = true; MemoryContextSwitchTo(oldContext); @@ -529,6 +562,8 @@ columnar_index_fetch_end(IndexFetchTableData *sscan) scan->cs_readState = NULL; } + bms_free(scan->attr_needed); + /* clean up any caches. */ if (columnar_enable_page_cache == true) { @@ -536,6 +571,40 @@ columnar_index_fetch_end(IndexFetchTableData *sscan) } } +static StripeMetadata * +FindStripeMetadataFromListBinarySearch(IndexFetchColumnarData *scan, uint64 rowNumber) +{ + ListCell *lc = NULL; + + int high = scan->stripeMetadataList->length - 1; + int low = 0; + + while(low <= high) + { + int mid = low + (high - low) / 2; + + lc = list_nth_cell(scan->stripeMetadataList, mid); + + StripeMetadata *stripeMetadata = lc->ptr_value; + + if (rowNumber >= stripeMetadata->firstRowNumber && + rowNumber < stripeMetadata->firstRowNumber + stripeMetadata->rowCount) + { + return stripeMetadata; + } + + if (stripeMetadata->firstRowNumber > rowNumber) + { + high = mid - 1; + } + else + { + low = mid + 1; + } + } + return NULL; +} + static bool columnar_index_fetch_tuple(struct IndexFetchTableData *sscan, @@ -567,25 +636,38 @@ columnar_index_fetch_tuple(struct IndexFetchTableData *sscan, /* initialize read state for the first row */ if (scan->cs_readState == NULL) { - /* we need all columns */ - int natts = columnarRelation->rd_att->natts; - Bitmapset *attr_needed = bms_add_range(NULL, 0, natts - 1); - /* no quals for index scan */ List *scanQual = NIL; + if (bms_is_empty(scan->attr_needed)) + { + /* we need all columns */ + int natts = columnarRelation->rd_att->natts; + bms_free(scan->attr_needed); + scan->attr_needed = bms_add_range(NULL, 0, natts - 1); + } + bool randomAccess = true; scan->cs_readState = init_columnar_read_state(columnarRelation, slot->tts_tupleDescriptor, - attr_needed, scanQual, + scan->attr_needed, scanQual, scan->scanContext, snapshot, randomAccess, NULL); + if (scan->is_select_query) + scan->stripeMetadataList = + StripesForRelfilenode(columnarRelation->rd_node, ForwardScanDirection); } uint64 rowNumber = tid_to_row_number(*tid); - StripeMetadata *stripeMetadata = - FindStripeWithMatchingFirstRowNumber(columnarRelation, rowNumber, snapshot); + + StripeMetadata *stripeMetadata = NULL; + + if (scan->is_select_query) + stripeMetadata = FindStripeMetadataFromListBinarySearch(scan, rowNumber); + else + stripeMetadata = FindStripeWithMatchingFirstRowNumber(columnarRelation, rowNumber, snapshot); + if (!stripeMetadata) { /* it is certain that tuple with rowNumber doesn't exist */ @@ -593,6 +675,7 @@ columnar_index_fetch_tuple(struct IndexFetchTableData *sscan, } StripeWriteStateEnum stripeWriteState = StripeWriteState(stripeMetadata); + if (stripeWriteState == STRIPE_WRITE_FLUSHED && !ColumnarReadRowByRowNumber(scan->cs_readState, rowNumber, slot->tts_values, slot->tts_isnull)) @@ -682,7 +765,8 @@ columnar_index_fetch_tuple(struct IndexFetchTableData *sscan, Assert(stripeWriteState == STRIPE_WRITE_FLUSHED); } - pfree(stripeMetadata); + if (!scan->is_select_query) + pfree(stripeMetadata); slot->tts_tableOid = RelationGetRelid(columnarRelation); slot->tts_tid = *tid; ExecStoreVirtualTuple(slot); diff --git a/columnar/src/include/columnar/columnar.h b/columnar/src/include/columnar/columnar.h index f912715f..67751437 100644 --- a/columnar/src/include/columnar/columnar.h +++ b/columnar/src/include/columnar/columnar.h @@ -255,6 +255,7 @@ extern bool columnar_enable_vectorization; extern bool columnar_enable_dml; extern bool columnar_enable_page_cache; extern int columnar_page_cache_size; +extern bool columnar_index_scan; /* called when the user changes options on the given relation */ diff --git a/columnar/src/include/columnar/columnar_customscan.h b/columnar/src/include/columnar/columnar_customscan.h index e8c7fe3d..9a2b463b 100644 --- a/columnar/src/include/columnar/columnar_customscan.h +++ b/columnar/src/include/columnar/columnar_customscan.h @@ -20,5 +20,6 @@ extern void columnar_customscan_init(void); extern const CustomScanMethods * columnar_customscan_methods(void); +extern Bitmapset * ColumnarAttrNeeded(ScanState *ss, List *customList); #endif /* COLUMNAR_CUSTOMSCAN_H */ diff --git a/columnar/src/include/columnar/columnar_indexscan.h b/columnar/src/include/columnar/columnar_indexscan.h new file mode 100644 index 00000000..78c840f5 --- /dev/null +++ b/columnar/src/include/columnar/columnar_indexscan.h @@ -0,0 +1,29 @@ +/*------------------------------------------------------------------------- + * + * columnar_indexscan.h + * Custom scan method for index + * + * IDENTIFICATION + * src/backend/columnar/columnar_indexscan.c + * + *------------------------------------------------------------------------- + */ + + +#ifndef COLUMNAR_INDEXSCAN_H +#define COLUMNAR_INDEXSCAN_H + +#include "postgres.h" + +#include "nodes/execnodes.h" + +typedef struct ColumnarIndexScanState +{ + CustomScanState css; + IndexScanState *indexscan_state; +} ColumnarIndexScanState; + +extern CustomScan * columnar_create_indexscan_node(void); +extern void columnar_register_indexscan_node(void); + +#endif \ No newline at end of file diff --git a/columnar/src/include/columnar/columnar_tableam.h b/columnar/src/include/columnar/columnar_tableam.h index 5bce529f..2bba9262 100644 --- a/columnar/src/include/columnar/columnar_tableam.h +++ b/columnar/src/include/columnar/columnar_tableam.h @@ -59,6 +59,8 @@ extern TableScanDesc columnar_beginscan_extended(Relation relation, Snapshot sna List *scanQual, ParallelColumnarScan parallelColumnarScan, bool returnVectorResult); +extern IndexFetchTableData * columnar_index_fetch_begin_extended(Relation rel, + Bitmapset *attr_neededs); extern int64 ColumnarScanChunkGroupsFiltered(ColumnarScanDesc columnarScanDesc); extern bool ColumnarSupportsIndexAM(char *indexAMName); extern bool IsColumnarTableAmTable(Oid relationId); diff --git a/columnar/src/test/regress/columnar_schedule b/columnar/src/test/regress/columnar_schedule index 7b2bf5d7..277d4db7 100644 --- a/columnar/src/test/regress/columnar_schedule +++ b/columnar/src/test/regress/columnar_schedule @@ -36,3 +36,4 @@ test: columnar_alter_table_set_access_method test: columnar_cache test: columnar_aggregates test: columnar_upsert +test: columnar_customindex diff --git a/columnar/src/test/regress/expected/columnar_customindex.out b/columnar/src/test/regress/expected/columnar_customindex.out new file mode 100644 index 00000000..0ca7f9c2 --- /dev/null +++ b/columnar/src/test/regress/expected/columnar_customindex.out @@ -0,0 +1,43 @@ +-- +-- Test custom index: Test shows only that we are exchanging Index scan with CustomIndexScan while +-- all other information is still the same. The feature is not visible through EXPLAIN because output +-- will be same in both cases, but difference is noticeable how the columnar storage engine is read. +-- With index scan ALL columns are requested from storage (this can be significant overhead in performance) +-- while with CustomIndexScan we will request only columns that are needed - that are going to be used as output. +-- Test also shows that we are not using CustomIndexScan with heap tables and only on columnar tables. +-- +SET columnar.enable_columnar_index_scan TO TRUE; +CREATE TABLE t(a INT PRIMARY KEY, b INT, c TEXT) USING columnar; +CREATE INDEX t_idx ON t USING btree(b); +CREATE TABLE t_heap(a INT PRIMARY KEY, b INT, c TEXT); +CREATE INDEX t_idx_heap ON t_heap USING btree(b); +INSERT INTO t SELECT g, g % 20, 'abcde_' || g FROM generate_series(1, 300000) g; +INSERT INTO t_heap SELECT g, g % 20, 'abcde_' || g FROM generate_series(1, 300000) g; +-- make sure that we test index scan +set columnar.enable_custom_scan TO 'off'; +set enable_seqscan TO off; +set seq_page_cost TO 10000000; +EXPLAIN (VERBOSE) SELECT a FROM t WHERE b > 18 ORDER BY b LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------------------- + Limit (cost=0.30..0.56 rows=10 width=8) + Output: a, b + -> Custom Scan (ColumnarIndexScan) (cost=0.30..2586.30 rows=100000 width=8) + Output: a, b + ColumnarIndexScan using : t_idx + Index Cond: (t.b > 18) +(6 rows) + +EXPLAIN (VERBOSE) SELECT a FROM t_heap WHERE b > 18 ORDER BY b LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Limit (cost=0.29..1.51 rows=10 width=8) + Output: a, b + -> Index Scan using t_idx_heap on public.t_heap (cost=0.29..9305.99 rows=76440 width=8) + Output: a, b + Index Cond: (t_heap.b > 18) +(5 rows) + +DROP TABLE t; +DROP TABLE t_heap; +SET columnar.enable_custom_index_scan TO default; diff --git a/columnar/src/test/regress/expected/columnar_customindex_1.out b/columnar/src/test/regress/expected/columnar_customindex_1.out new file mode 100644 index 00000000..a8a27185 --- /dev/null +++ b/columnar/src/test/regress/expected/columnar_customindex_1.out @@ -0,0 +1,42 @@ +-- +-- Test custom index: Test shows only that we are exchanging Index scan with CustomIndexScan while +-- all other information is still the same. The feature is not visible through EXPLAIN because output +-- will be same in both cases, but difference is noticeable how the columnar storage engine is read. +-- With index scan ALL columns are requested from storage (this can be significant overhead in performance) +-- while with CustomIndexScan we will request only columns that are needed - that are going to be used as output. +-- Test also shows that we are not using CustomIndexScan with heap tables and only on columnar tables. +-- +SET columnar.enable_columnar_index_scan TO TRUE; +CREATE TABLE t(a INT PRIMARY KEY, b INT, c TEXT) USING columnar; +CREATE INDEX t_idx ON t USING btree(b); +CREATE TABLE t_heap(a INT PRIMARY KEY, b INT, c TEXT); +CREATE INDEX t_idx_heap ON t_heap USING btree(b); +INSERT INTO t SELECT g, g % 20, 'abcde_' || g FROM generate_series(1, 300000) g; +INSERT INTO t_heap SELECT g, g % 20, 'abcde_' || g FROM generate_series(1, 300000) g; +-- make sure that we test index scan +set columnar.enable_custom_scan TO 'off'; +set enable_seqscan TO off; +set seq_page_cost TO 10000000; +EXPLAIN (VERBOSE) SELECT a FROM t WHERE b > 18 ORDER BY b LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------ + Limit (cost=0.30..0.56 rows=10 width=8) + Output: a, b + -> Index Scan using t_idx on public.t (cost=0.30..2586.30 rows=100000 width=8) + Output: a, b + Index Cond: (t.b > 18) +(5 rows) + +EXPLAIN (VERBOSE) SELECT a FROM t_heap WHERE b > 18 ORDER BY b LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Limit (cost=0.29..1.51 rows=10 width=8) + Output: a, b + -> Index Scan using t_idx_heap on public.t_heap (cost=0.29..9305.99 rows=76440 width=8) + Output: a, b + Index Cond: (t_heap.b > 18) +(5 rows) + +DROP TABLE t; +DROP TABLE t_heap; +SET columnar.enable_custom_index_scan TO default; diff --git a/columnar/src/test/regress/sql/columnar_customindex.sql b/columnar/src/test/regress/sql/columnar_customindex.sql new file mode 100644 index 00000000..3cc08844 --- /dev/null +++ b/columnar/src/test/regress/sql/columnar_customindex.sql @@ -0,0 +1,33 @@ +-- +-- Test custom index: Test shows only that we are exchanging Index scan with CustomIndexScan while +-- all other information is still the same. The feature is not visible through EXPLAIN because output +-- will be same in both cases, but difference is noticeable how the columnar storage engine is read. +-- With index scan ALL columns are requested from storage (this can be significant overhead in performance) +-- while with CustomIndexScan we will request only columns that are needed - that are going to be used as output. +-- Test also shows that we are not using CustomIndexScan with heap tables and only on columnar tables. +-- + +SET columnar.enable_columnar_index_scan TO TRUE; + +CREATE TABLE t(a INT PRIMARY KEY, b INT, c TEXT) USING columnar; +CREATE INDEX t_idx ON t USING btree(b); + +CREATE TABLE t_heap(a INT PRIMARY KEY, b INT, c TEXT); +CREATE INDEX t_idx_heap ON t_heap USING btree(b); + +INSERT INTO t SELECT g, g % 20, 'abcde_' || g FROM generate_series(1, 300000) g; +INSERT INTO t_heap SELECT g, g % 20, 'abcde_' || g FROM generate_series(1, 300000) g; + +-- make sure that we test index scan +set columnar.enable_custom_scan TO 'off'; +set enable_seqscan TO off; +set seq_page_cost TO 10000000; + +EXPLAIN (VERBOSE) SELECT a FROM t WHERE b > 18 ORDER BY b LIMIT 10; + +EXPLAIN (VERBOSE) SELECT a FROM t_heap WHERE b > 18 ORDER BY b LIMIT 10; + +DROP TABLE t; +DROP TABLE t_heap; + +SET columnar.enable_custom_index_scan TO default;