Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up introspection of system objects like Database and Role #6665

Merged
merged 1 commit into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions edb/pgsql/compiler/clauses.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,11 @@ def fini_toplevel(
# Type rewrites go first.
if stmt.ctes is None:
stmt.ctes = []
stmt.ctes[:0] = list(ctx.param_ctes.values())
stmt.ctes[:0] = list(ctx.type_ctes.values())
stmt.ctes[:0] = [
*ctx.param_ctes.values(),
*ctx.ptr_ctes.values(),
*ctx.type_ctes.values(),
]

if ctx.env.named_param_prefix is None:
# Adding unused parameters into a CTE
Expand Down
8 changes: 7 additions & 1 deletion edb/pgsql/compiler/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,11 @@ class CompilerContextLevel(compiler.ContextLevel):
#: CTEs representing decoded parameters
param_ctes: Dict[str, pgast.CommonTableExpr]

#: CTEs representing schema types, when rewritten based on access policy
#: CTEs representing pointer tables that we need to force to be
#: materialized for performance reasons.
ptr_ctes: Dict[uuid.UUID, pgast.CommonTableExpr]

#: CTEs representing types, when rewritten based on access policy
type_ctes: Dict[FullRewriteKey, pgast.CommonTableExpr]

#: A set of type CTEs currently being generated
Expand Down Expand Up @@ -322,6 +326,7 @@ def __init__(
self.rel = NO_STMT
self.rel_hierarchy = {}
self.param_ctes = {}
self.ptr_ctes = {}
self.type_ctes = {}
self.pending_type_ctes = set()
self.dml_stmts = {}
Expand Down Expand Up @@ -361,6 +366,7 @@ def __init__(
self.rel = prevlevel.rel
self.rel_hierarchy = prevlevel.rel_hierarchy
self.param_ctes = prevlevel.param_ctes
self.ptr_ctes = prevlevel.ptr_ctes
self.type_ctes = prevlevel.type_ctes
self.pending_type_ctes = prevlevel.pending_type_ctes
self.dml_stmts = prevlevel.dml_stmts
Expand Down
66 changes: 63 additions & 3 deletions edb/pgsql/compiler/relctx.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,21 @@ def _lateral_union_join(
type='cross', larg=larg, rarg=rarg)


def _needs_cte(typeref: irast.TypeRef) -> bool:
"""Check whether a typeref needs to be forced into a materialized CTE.

The main use case here is for sys::SystemObjects which are stored as
views that populate their data by parsing JSON metadata embedded in
comments on the SQL system objects. The query plans when fetching multi
links from these objects wind up being pretty pathologically quadratic.

So instead we force the objects and links into materialized CTEs
so that they *can't* be shoved into nested loops.
"""
assert isinstance(typeref.name_hint, sn.QualName)
return typeref.name_hint.module == 'sys'


def range_for_material_objtype(
typeref: irast.TypeRef,
path_id: irast.PathId,
Expand Down Expand Up @@ -1393,12 +1408,24 @@ def range_for_material_objtype(
)
rw_key = (typeref.id, include_descendants)
key = rw_key + (dml_source_key,)
force_cte = _needs_cte(typeref)
if (
(not ignore_rewrites or is_global)
and (rewrite := ctx.env.type_rewrites.get(rw_key)) is not None
and (
(rewrite := ctx.env.type_rewrites.get(rw_key)) is not None
or force_cte
)
and rw_key not in ctx.pending_type_ctes
and not for_mutation
):
if not rewrite:
# If we are forcing CTE materialization but there is not a
# real rewrite, then create a trivial one.
rewrite = irast.Set(
path_id=irast.PathId.from_typeref(typeref, namespace={'rw'}),
typeref=typeref,
)

# Don't include overlays in the normal way in trigger mode
# when a type cte is used, because we bake the overlays into
# the cte instead (and so including them normally could union
Expand Down Expand Up @@ -1433,9 +1460,9 @@ def range_for_material_objtype(
type_rel = sctx.rel
else:
type_cte = pgast.CommonTableExpr(
name=ctx.env.aliases.get('t'),
name=ctx.env.aliases.get(f't_{typeref.name_hint}'),
query=sctx.rel,
materialized=is_global,
materialized=is_global or force_cte,
)
ctx.type_ctes[key] = type_cte
type_rel = type_cte
Expand Down Expand Up @@ -1789,6 +1816,36 @@ def range_from_queryset(
return rvar


def _make_link_table_cte(
ptrref: irast.PointerRef,
relation: pgast.Relation,
*,
ctx: context.CompilerContextLevel,
) -> pgast.Relation:
if (ptr_cte := ctx.ptr_ctes.get(ptrref.id)) is None:
ptr_select = pgast.SelectStmt(
target_list=[
pgast.ResTarget(val=pgast.ColumnRef(name=[pgast.Star()])),
],
from_clause=[pgast.RelRangeVar(relation=relation)],
)
ptr_cte = pgast.CommonTableExpr(
name=ctx.env.aliases.get(f'p_{ptrref.name}'),
query=ptr_select,
materialized=True,
)
ctx.ptr_ctes[ptrref.id] = ptr_cte

# We've set up the CTE to be a perfect drop in replacement for
# the real table (with a select *), so instead of pointing the
# rvar at the CTE (which would require routing path ids), just
# treat it like a base relation.
return pgast.Relation(
name=ptr_cte.name,
type_or_ptr_ref=ptrref,
)


def table_from_ptrref(
ptrref: irast.PointerRef,
ptr_info: pg_types.PointerStorageInfo,
Expand All @@ -1811,6 +1868,9 @@ def table_from_ptrref(
type_or_ptr_ref=ptrref,
)

if aspect == 'inhview' and _needs_cte(ptrref.out_source):
relation = _make_link_table_cte(ptrref, relation, ctx=ctx)

# Pseudo pointers (tuple and type intersection) have no schema id.
sobj_id = ptrref.id if isinstance(ptrref, irast.PointerRef) else None
rvar = pgast.RelRangeVar(
Expand Down
15 changes: 0 additions & 15 deletions edb/schema/reflection/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,21 +805,6 @@ def generate_structure(
sn.UnqualName(f'{refdict.attr}__internal'),
)

# HACK: sys::Database is an AnnotationSubject, but
# there is no way to actually put annotations on it,
# and fetching them results in some pathological
# quadratic queries where each inner iteration does
# expensive fetching of metadata and JSON decoding.
# Override it to return nothing.
# TODO: For future versions, we can probably just
# drop it.
if (
str(rschema_name) == 'sys::Database'
and refdict.attr == 'annotations'
):
props = {}
read_ptr = f'{read_ptr} := <schema::AnnotationValue>{{}}'

for field in props:
sfn = field.sname
prop_shape_els.append(f'@{sfn}')
Expand Down
6 changes: 4 additions & 2 deletions edb/server/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,13 +1287,15 @@ def compile_intro_queries_stdlib(
),
)

local_intro_sql = ' UNION ALL '.join(sql_intro_local_parts)
local_intro_sql = ' UNION ALL '.join(
f'({x})' for x in sql_intro_local_parts)
local_intro_sql = f'''
WITH intro(c) AS ({local_intro_sql})
SELECT json_agg(intro.c) FROM intro
'''

global_intro_sql = ' UNION ALL '.join(sql_intro_global_parts)
global_intro_sql = ' UNION ALL '.join(
f'({x})' for x in sql_intro_global_parts)
global_intro_sql = f'''
WITH intro(c) AS ({global_intro_sql})
SELECT json_agg(intro.c) FROM intro
Expand Down
Loading