-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add support for <-> operator rewriting #189
Changes from all commits
cd9b8c1
54eba82
0831fa5
fc3a63a
9718bb7
a802f48
9194842
58e0b96
352f065
442aa50
c63a49a
cf4a683
7d49cdb
69e5250
5d49fa5
64ca0e8
fcddebd
1d0c513
5a9949d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
#include <postgres.h> | ||
|
||
#include "op_rewrite.h" | ||
|
||
#include <access/genam.h> | ||
#include <assert.h> | ||
#include <catalog/pg_amproc.h> | ||
#include <catalog/pg_opclass.h> | ||
#include <commands/defrem.h> | ||
#include <miscadmin.h> | ||
#include <nodes/nodeFuncs.h> | ||
#include <parser/parsetree.h> | ||
#include <stdbool.h> | ||
#include <stdint.h> | ||
#include <utils/catcache.h> | ||
#include <utils/memutils.h> | ||
#include <utils/rel.h> | ||
#include <utils/syscache.h> | ||
|
||
#include "plan_tree_walker.h" | ||
#include "utils.h" | ||
|
||
#if PG_VERSION_NUM < 120000 | ||
#include <access/heapam.h> | ||
#include <access/htup_details.h> | ||
#else | ||
#include <access/relation.h> | ||
#endif | ||
|
||
static Node *operator_rewriting_mutator(Node *node, void *ctx); | ||
|
||
void base_plan_mutator(Plan *plan, void *context) | ||
{ | ||
plan->lefttree = (Plan *)operator_rewriting_mutator((Node *)plan->lefttree, context); | ||
plan->righttree = (Plan *)operator_rewriting_mutator((Node *)plan->righttree, context); | ||
plan->initPlan = (List *)operator_rewriting_mutator((Node *)plan->initPlan, context); | ||
// checking qual and target list at the end covers some edge cases, if you modify this leave them here | ||
plan->qual = (List *)operator_rewriting_mutator((Node *)plan->qual, context); | ||
plan->targetlist = (List *)operator_rewriting_mutator((Node *)plan->targetlist, context); | ||
} | ||
|
||
// recursively descend the plan tree searching for expressions with the <-> operator that are part of a non-index scan | ||
// src/include/nodes/plannodes.h and src/include/nodes/nodes.h contain relevant definitions | ||
Node *plan_tree_mutator(Plan *plan, void *context) | ||
{ | ||
check_stack_depth(); | ||
|
||
switch(nodeTag(plan)) { | ||
case T_SubqueryScan: | ||
{ | ||
SubqueryScan *subqueryscan = (SubqueryScan *)plan; | ||
base_plan_mutator(&(subqueryscan->scan.plan), context); | ||
subqueryscan->subplan = (Plan *)operator_rewriting_mutator((Node *)subqueryscan->subplan, context); | ||
return (Node *)subqueryscan; | ||
} | ||
case T_CteScan: | ||
{ | ||
CteScan *ctescan = (CteScan *)plan; | ||
base_plan_mutator(&(ctescan->scan.plan), context); | ||
return (Node *)ctescan; | ||
} | ||
#if PG_VERSION_NUM < 160000 | ||
case T_Join: | ||
{ | ||
Join *join = (Join *)plan; | ||
base_plan_mutator(&(join->plan), context); | ||
join->joinqual = (List *)operator_rewriting_mutator((Node *)join->joinqual, context); | ||
return (Node *)join; | ||
} | ||
#endif | ||
case T_NestLoop: | ||
{ | ||
NestLoop *nestloop = (NestLoop *)plan; | ||
base_plan_mutator((Plan *)&(nestloop->join), context); | ||
return (Node *)nestloop; | ||
} | ||
case T_Result: | ||
{ | ||
Result *result = (Result *)plan; | ||
base_plan_mutator(&(result->plan), context); | ||
result->resconstantqual = operator_rewriting_mutator((Node *)result->resconstantqual, context); | ||
return (Node *)result; | ||
} | ||
case T_Limit: | ||
{ | ||
Limit *limit = (Limit *)plan; | ||
base_plan_mutator(&(limit->plan), context); | ||
limit->limitOffset = operator_rewriting_mutator((Node *)limit->limitOffset, context); | ||
limit->limitCount = operator_rewriting_mutator((Node *)limit->limitCount, context); | ||
return (Node *)limit; | ||
} | ||
case T_Append: | ||
{ | ||
Append *append = (Append *)plan; | ||
base_plan_mutator(&(append->plan), context); | ||
append->appendplans = (List *)operator_rewriting_mutator((Node *)append->appendplans, context); | ||
return (Node *)append; | ||
} | ||
// case T_IncrementalSort: // We will eventually support this | ||
case T_Agg: | ||
case T_Group: | ||
case T_Sort: | ||
case T_Unique: | ||
case T_SetOp: | ||
case T_Hash: | ||
case T_HashJoin: | ||
case T_WindowAgg: | ||
case T_LockRows: | ||
{ | ||
base_plan_mutator(plan, context); | ||
return (Node *)plan; | ||
} | ||
case T_ModifyTable: // No order by when modifying a table (update/delete etc) | ||
case T_BitmapAnd: // We do not provide a bitmap index | ||
case T_BitmapOr: | ||
case T_BitmapHeapScan: | ||
case T_BitmapIndexScan: | ||
case T_FunctionScan: // SELECT * FROM fn(x, y, z) | ||
case T_ValuesScan: // VALUES (1), (2) | ||
case T_Material: // https://stackoverflow.com/questions/31410030/ | ||
#if PG_VERSION_NUM >= 140000 | ||
case T_Memoize: // memoized inner loop must have an index to be memoized | ||
#endif | ||
case T_WorkTableScan: // temporary table, shouldn't have index | ||
case T_ProjectSet: // "execute set returning functions" feels safe to exclude | ||
case T_TableFuncScan: // scan of a function that returns a table, shouldn't have an index | ||
case T_ForeignScan: // if the relation is foreign we can't determine if it has an index | ||
default: | ||
break; | ||
} | ||
return (Node *)plan; | ||
} | ||
|
||
// To write syscache calls look for the 'static const struct cachedesc cacheinfo[]' in utils/cache/syscache.c | ||
// These describe the different caches that will be initialized into SysCache and the keys they support in searches | ||
// The anums tell you the table and the column that the key will be compared to this is afaict the only way to match | ||
// them to SQL for example pg_am.oid -> Anum_pg_am_oid the keys must be in order but they need not all be included the | ||
// comment next to the top label is the name of the #defined cacheid that you should use as your first argument you can | ||
// destructure the tuple int a From_(table_name) with GETSTRUCT to pull individual rows out | ||
static Oid get_func_id_from_index(Relation index) | ||
{ | ||
Oid hnswamoid = get_index_am_oid("hnsw", false); | ||
if(index->rd_rel->relam != hnswamoid) return InvalidOid; | ||
|
||
// indclass is inaccessible on the form data | ||
// https://www.postgresql.org/docs/current/system-catalog-declarations.html | ||
bool isNull; | ||
Oid idxopclassoid; | ||
Datum classDatum = SysCacheGetAttr(INDEXRELID, index->rd_indextuple, Anum_pg_index_indclass, &isNull); | ||
if(!isNull) { | ||
oidvector *indclass = (oidvector *)DatumGetPointer(classDatum); | ||
assert(indclass->dim1 == 1); | ||
idxopclassoid = indclass->values[ 0 ]; | ||
} else { | ||
index_close(index, AccessShareLock); | ||
elog(ERROR, "Failed to retrieve indclass oid from index class"); | ||
} | ||
|
||
// SELECT * FROM pg_opclass WHERE opcmethod=hnswamoid AND opcname=dist_cos_ops | ||
HeapTuple opclassTuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(idxopclassoid)); | ||
if(!HeapTupleIsValid(opclassTuple)) { | ||
index_close(index, AccessShareLock); | ||
elog(ERROR, "Failed to find operator class for key column"); | ||
} | ||
|
||
Oid opclassOid = ((Form_pg_opclass)GETSTRUCT(opclassTuple))->opcfamily; | ||
ReleaseSysCache(opclassTuple); | ||
|
||
// SELECT * FROM pg_amproc WHERE amprocfamily=opclassOid | ||
// SearchSysCache1 is what we want and in fact it runs fine against release builds. However debug builds assert that | ||
// AMPROCNUM takes only 1 arg which isn't true and so they fail. We therefore have to use SearchSysCacheList1 since | ||
// it doesn't enforce this invariant. Ideally we would call SearchCatCache1 directly but postgres doesn't expose | ||
// necessary constants | ||
CatCList *opList = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opclassOid)); | ||
assert(opList->n_members == 1); | ||
HeapTuple opTuple = &opList->members[ 0 ]->tuple; | ||
if(!HeapTupleIsValid(opTuple)) { | ||
index_close(index, AccessShareLock); | ||
elog(ERROR, "Failed to find the function for operator class"); | ||
} | ||
Oid functionId = ((Form_pg_amproc)GETSTRUCT(opTuple))->amproc; | ||
ReleaseCatCacheList(opList); | ||
|
||
return functionId; | ||
} | ||
|
||
static Node *operator_rewriting_mutator(Node *node, void *ctx) | ||
{ | ||
OpRewriterContext *context = (OpRewriterContext *)ctx; | ||
|
||
if(node == NULL) return node; | ||
|
||
if(IsA(node, OpExpr)) { | ||
OpExpr *opExpr = (OpExpr *)node; | ||
if(list_member_oid(context->ldb_ops, opExpr->opno)) { | ||
if(context->indices == NULL) { | ||
return node; | ||
} else { | ||
ListCell *lc; | ||
foreach(lc, context->indices) { | ||
uintptr_t intermediate = (uintptr_t)lfirst(lc); | ||
Oid indexid = (Oid)intermediate; | ||
Relation index = index_open(indexid, AccessShareLock); | ||
Oid indexfunc = get_func_id_from_index(index); | ||
if(OidIsValid(indexfunc)) { | ||
MemoryContext old = MemoryContextSwitchTo(MessageContext); | ||
FuncExpr *fnExpr = makeNode(FuncExpr); | ||
fnExpr->funcresulttype = opExpr->opresulttype; | ||
fnExpr->funcretset = opExpr->opretset; | ||
fnExpr->funccollid = opExpr->opcollid; | ||
fnExpr->inputcollid = opExpr->inputcollid; | ||
fnExpr->args = opExpr->args; | ||
fnExpr->location = opExpr->location; | ||
// operators can't take variadic arguments | ||
fnExpr->funcvariadic = false; | ||
// print it as a function | ||
fnExpr->funcformat = COERCE_EXPLICIT_CALL; | ||
fnExpr->funcid = indexfunc; | ||
MemoryContextSwitchTo(old); | ||
|
||
index_close(index, AccessShareLock); | ||
|
||
return (Node *)fnExpr; | ||
} | ||
index_close(index, AccessShareLock); | ||
} | ||
return node; | ||
} | ||
} | ||
} | ||
|
||
if(IsA(node, IndexScan) || IsA(node, IndexOnlyScan)) { | ||
return node; | ||
} | ||
if(IsA(node, SeqScan) || IsA(node, SampleScan)) { | ||
Scan *scan = (Scan *)node; | ||
Plan *scanPlan = &scan->plan; | ||
Oid rtrelid = scan->scanrelid; | ||
RangeTblEntry *rte = rt_fetch(rtrelid, context->rtable); | ||
Oid relid = rte->relid; | ||
Relation rel = relation_open(relid, AccessShareLock); | ||
if(rel->rd_indexvalid) { | ||
context->indices = RelationGetIndexList(rel); | ||
} | ||
relation_close(rel, AccessShareLock); | ||
|
||
base_plan_mutator(scanPlan, context); | ||
return (Node *)scan; | ||
} | ||
|
||
if(IsA(node, List)) { | ||
MemoryContext old = MemoryContextSwitchTo(MessageContext); | ||
List *list = (List *)node; | ||
List *ret = NIL; | ||
ListCell *lc; | ||
foreach(lc, list) { | ||
ret = lappend(ret, operator_rewriting_mutator((Node *)lfirst(lc), ctx)); | ||
} | ||
MemoryContextSwitchTo(old); | ||
return (Node *)ret; | ||
} | ||
|
||
if(is_plan_node(node)) { | ||
return (Node *)plan_tree_mutator((Plan *)node, ctx); | ||
} else { | ||
return expression_tree_mutator(node, operator_rewriting_mutator, ctx); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are all nodes either plan nodes or expression tree nodes? are there other options? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I'm not sure, @dqii is more knowledgeable than I am about these systems, she may know. Otherwise I will try to figure out There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this thread seems to imply that the plan tree is made exclusively of exprs and plans. It's over a decade old but looking at some of the code in the executor I think it's probably accurate still. The executor doesn't check node types at all and the functions I've inspected all appear to expect expressions. Intuitively it sorta makes sense that the executor takes only exprs, statements and plans. Since statements are a separate tree I think it's safe to enforce the invariant |
||
} | ||
} | ||
|
||
bool ldb_rewrite_ops(Plan *plan, List *oidList, List *rtable) | ||
{ | ||
Node *node = (Node *)plan; | ||
|
||
OpRewriterContext context; | ||
context.ldb_ops = oidList; | ||
context.indices = NULL; | ||
context.rtable = rtable; | ||
|
||
if(IsA(node, IndexScan) || IsA(node, IndexOnlyScan)) { | ||
return false; | ||
} | ||
|
||
operator_rewriting_mutator(node, (void *)&context); | ||
return true; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#ifndef LDB_HOOKS_OP_REWRITE_H | ||
#define LDB_HOOKS_OP_REWRITE_H | ||
|
||
#include <nodes/pg_list.h> | ||
#include <nodes/plannodes.h> | ||
|
||
typedef struct OpRewriterContext | ||
{ | ||
List *ldb_ops; | ||
List *indices; | ||
List *rtable; | ||
} OpRewriterContext; | ||
|
||
bool ldb_rewrite_ops(Plan *plan, List *oidList, List *rtable); | ||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,4 +24,4 @@ List *ldb_get_operator_oids() | |
list_free(nameList); | ||
|
||
return oidList; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Question: Are these all of the cases that need to be covered?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not familiar enough with all of them to say for sure. I removed some I know are definitely unnecessary. The remaining ones I couldn't definitively rule out so I was hoping to get some additional eyes on them
some very rough and possibly erroneous notes.
SubqueryScan
I think this is required, since a subquery could include the case we're trying to addressCteScan
Unecessary if the CTE doesn't use the index, I'm not sure what the expected behavior isJoin
One of both of the tables being joined can be generated by a select so I think this is requiredAgg
I think that this can cover what is essentially a subquery e.g.ANY(SELECT ....)
Group
This can probably be removed, I wasn't sure if it could have a subquery with an order by but at least for a single query I think it's 1 or the other it is possible this could end up parenting a CTE though I don't know enough to saySort
I thinkORDDER BY
can generate a sort meriting its inclusion, but I'm not sureUnique
This can probably be removed, I don't think you can call unique on an expression, unless you can dounique(v <-> ...)
NestLoop
loops can be seqscans, I think this is requiredResult
I couldn't find much information on this plan type, and so included it to be safeLimit
When I looked at this it seemed like the limit becomes the root node, so I think we need thisAppend
Not 100% sure where this will end up in the tree, but I think you can have a union of two scans so this should be checkedThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Haha just realized I misunderstood the question, apologies. I had assumed that plan tree walker was covering and just removed the nodes I have explicit checks for. I don't know enough about the remaining plan types to say if they might require checks. I will try to read more about them. Immediately
T_IncrementalSort
feels like it should probably be included so my guess is that I'm missing several othersThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No worries, I was just asking if this was a deliberately curated subset or just what you ended up covering. I don't know if removing is the move, since I know the operator can end up in funny subqueries and joins.
Assuming you meant to refer to the plan walker I wrote -- there's around 30 plan nodes I believe, which you can see in nodes.h. I only covered the ones that I was able to generate tests for. It probably would be better for me to have added the other cases as well. I just couldn't figure out tests for the rest. For example, I couldn't trigger T_Join (joins ended up using a different plan node), and it ended up getting removed in Postgres 16. I'm not sure if it gets triggered at all in Postgres 15 -- I think it gets triggered in earlier Postgres versions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, that makes sense. You got way more than I've been able to generate! Reading through the additional types it seems like most if not all of the ones that were left out can be safely ignored. I'll try going over the rest of them tomorrow. It's funny that T_Join is hard to trigger, you'd expect it would be relatively common.
One option I could implement pretty easily is to call the base mutator in the default so that we get some coverage on the more exotic types, this could be combined with a check that returns the path as is for ones we think are safe to ignore too.