Skip to content

Commit

Permalink
Speed up introspection of system objects like Database and Role (#6665)
Browse files Browse the repository at this point in the history
Currently if there are a lot of roles, doing a query like `select
sys::Role { member_of }` is quite slow. This is because the views for
both `Role` and `member_of` need to extract metadata for *every* Role
and pase it as json, and postgres is planning this as a nested loop.

Unfortunately, startup needs to query all these objects, so having
lots of databases or roles is a problem.

Fix this by forcing sys objects and links to always be materialized
into CTEs. This ensures that we do at most one linear scan of each of
the views. Since there is no way to hit an index when querying any of
these system object views, always doing a linear scan is no worse.

For the objects themselves we can leverage the rewrites mechanism. For
link tables, we add a slightly lighter weight mechanism to handle it.

Reverts the now-unnecessary main part of #6633 but keeps the extension
to the patch system. We could actually put annotations on databases now
and have it not be *too* slow.

This can be backported with an empty `edgeql+schema+globalonly` patch.
  • Loading branch information
msullivan authored and aljazerzen committed Jan 25, 2024
1 parent 490d460 commit 95c4f05
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 23 deletions.
7 changes: 5 additions & 2 deletions edb/pgsql/compiler/clauses.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,11 @@ def fini_toplevel(
# Type rewrites go first.
if stmt.ctes is None:
stmt.ctes = []
stmt.ctes[:0] = list(ctx.param_ctes.values())
stmt.ctes[:0] = list(ctx.type_ctes.values())
stmt.ctes[:0] = [
*ctx.param_ctes.values(),
*ctx.ptr_ctes.values(),
*ctx.type_ctes.values(),
]

if ctx.env.named_param_prefix is None:
# Adding unused parameters into a CTE
Expand Down
8 changes: 7 additions & 1 deletion edb/pgsql/compiler/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,11 @@ class CompilerContextLevel(compiler.ContextLevel):
#: CTEs representing decoded parameters
param_ctes: Dict[str, pgast.CommonTableExpr]

#: CTEs representing schema types, when rewritten based on access policy
#: CTEs representing pointer tables that we need to force to be
#: materialized for performance reasons.
ptr_ctes: Dict[uuid.UUID, pgast.CommonTableExpr]

#: CTEs representing types, when rewritten based on access policy
type_ctes: Dict[FullRewriteKey, pgast.CommonTableExpr]

#: A set of type CTEs currently being generated
Expand Down Expand Up @@ -322,6 +326,7 @@ def __init__(
self.rel = NO_STMT
self.rel_hierarchy = {}
self.param_ctes = {}
self.ptr_ctes = {}
self.type_ctes = {}
self.pending_type_ctes = set()
self.dml_stmts = {}
Expand Down Expand Up @@ -361,6 +366,7 @@ def __init__(
self.rel = prevlevel.rel
self.rel_hierarchy = prevlevel.rel_hierarchy
self.param_ctes = prevlevel.param_ctes
self.ptr_ctes = prevlevel.ptr_ctes
self.type_ctes = prevlevel.type_ctes
self.pending_type_ctes = prevlevel.pending_type_ctes
self.dml_stmts = prevlevel.dml_stmts
Expand Down
66 changes: 63 additions & 3 deletions edb/pgsql/compiler/relctx.py
Original file line number Diff line number Diff line change
Expand Up @@ -1359,6 +1359,21 @@ def _lateral_union_join(
type='cross', larg=larg, rarg=rarg)


def _needs_cte(typeref: irast.TypeRef) -> bool:
"""Check whether a typeref needs to be forced into a materialized CTE.
The main use case here is for sys::SystemObjects which are stored as
views that populate their data by parsing JSON metadata embedded in
comments on the SQL system objects. The query plans when fetching multi
links from these objects wind up being pretty pathologically quadratic.
So instead we force the objects and links into materialized CTEs
so that they *can't* be shoved into nested loops.
"""
assert isinstance(typeref.name_hint, sn.QualName)
return typeref.name_hint.module == 'sys'


def range_for_material_objtype(
typeref: irast.TypeRef,
path_id: irast.PathId,
Expand Down Expand Up @@ -1386,12 +1401,24 @@ def range_for_material_objtype(
)
rw_key = (typeref.id, include_descendants)
key = rw_key + (dml_source_key,)
force_cte = _needs_cte(typeref)
if (
not ignore_rewrites
and (rewrite := ctx.env.type_rewrites.get(rw_key)) is not None
and (
(rewrite := ctx.env.type_rewrites.get(rw_key)) is not None
or force_cte
)
and rw_key not in ctx.pending_type_ctes
and not for_mutation
):
if not rewrite:
# If we are forcing CTE materialization but there is not a
# real rewrite, then create a trivial one.
rewrite = irast.Set(
path_id=irast.PathId.from_typeref(typeref, namespace={'rw'}),
typeref=typeref,
)

# Don't include overlays in the normal way in trigger mode
# when a type cte is used, because we bake the overlays into
# the cte instead (and so including them normally could union
Expand Down Expand Up @@ -1426,9 +1453,9 @@ def range_for_material_objtype(
type_rel = sctx.rel
else:
type_cte = pgast.CommonTableExpr(
name=ctx.env.aliases.get('t'),
name=ctx.env.aliases.get(f't_{typeref.name_hint}'),
query=sctx.rel,
materialized=False,
materialized=force_cte,
)
ctx.type_ctes[key] = type_cte
type_rel = type_cte
Expand Down Expand Up @@ -1782,6 +1809,36 @@ def range_from_queryset(
return rvar


def _make_link_table_cte(
ptrref: irast.PointerRef,
relation: pgast.Relation,
*,
ctx: context.CompilerContextLevel,
) -> pgast.Relation:
if (ptr_cte := ctx.ptr_ctes.get(ptrref.id)) is None:
ptr_select = pgast.SelectStmt(
target_list=[
pgast.ResTarget(val=pgast.ColumnRef(name=[pgast.Star()])),
],
from_clause=[pgast.RelRangeVar(relation=relation)],
)
ptr_cte = pgast.CommonTableExpr(
name=ctx.env.aliases.get(f'p_{ptrref.name}'),
query=ptr_select,
materialized=True,
)
ctx.ptr_ctes[ptrref.id] = ptr_cte

# We've set up the CTE to be a perfect drop in replacement for
# the real table (with a select *), so instead of pointing the
# rvar at the CTE (which would require routing path ids), just
# treat it like a base relation.
return pgast.Relation(
name=ptr_cte.name,
type_or_ptr_ref=ptrref,
)


def table_from_ptrref(
ptrref: irast.PointerRef,
ptr_info: pg_types.PointerStorageInfo,
Expand All @@ -1804,6 +1861,9 @@ def table_from_ptrref(
type_or_ptr_ref=ptrref,
)

if aspect == 'inhview' and _needs_cte(ptrref.out_source):
relation = _make_link_table_cte(ptrref, relation, ctx=ctx)

# Pseudo pointers (tuple and type intersection) have no schema id.
sobj_id = ptrref.id if isinstance(ptrref, irast.PointerRef) else None
rvar = pgast.RelRangeVar(
Expand Down
15 changes: 0 additions & 15 deletions edb/schema/reflection/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,21 +837,6 @@ def generate_structure(
sn.UnqualName(f'{refdict.attr}__internal'),
)

# HACK: sys::Database is an AnnotationSubject, but
# there is no way to actually put annotations on it,
# and fetching them results in some pathological
# quadratic queries where each inner iteration does
# expensive fetching of metadata and JSON decoding.
# Override it to return nothing.
# TODO: For future versions, we can probably just
# drop it.
if (
str(rschema_name) == 'sys::Database'
and refdict.attr == 'annotations'
):
props = {}
read_ptr = f'{read_ptr} := <schema::AnnotationValue>{{}}'

for field in props:
sfn = field.sname
prop_shape_els.append(f'@{sfn}')
Expand Down
6 changes: 4 additions & 2 deletions edb/server/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,13 +1287,15 @@ def compile_intro_queries_stdlib(
),
)

local_intro_sql = ' UNION ALL '.join(sql_intro_local_parts)
local_intro_sql = ' UNION ALL '.join(
f'({x})' for x in sql_intro_local_parts)
local_intro_sql = f'''
WITH intro(c) AS ({local_intro_sql})
SELECT json_agg(intro.c) FROM intro
'''

global_intro_sql = ' UNION ALL '.join(sql_intro_global_parts)
global_intro_sql = ' UNION ALL '.join(
f'({x})' for x in sql_intro_global_parts)
global_intro_sql = f'''
WITH intro(c) AS ({global_intro_sql})
SELECT json_agg(intro.c) FROM intro
Expand Down

0 comments on commit 95c4f05

Please sign in to comment.