diff --git a/edb/pgsql/compiler/clauses.py b/edb/pgsql/compiler/clauses.py index 210002b3ef9..8e05299268e 100644 --- a/edb/pgsql/compiler/clauses.py +++ b/edb/pgsql/compiler/clauses.py @@ -401,8 +401,11 @@ def fini_toplevel( # Type rewrites go first. if stmt.ctes is None: stmt.ctes = [] - stmt.ctes[:0] = list(ctx.param_ctes.values()) - stmt.ctes[:0] = list(ctx.type_ctes.values()) + stmt.ctes[:0] = [ + *ctx.param_ctes.values(), + *ctx.ptr_ctes.values(), + *ctx.type_ctes.values(), + ] if ctx.env.named_param_prefix is None: # Adding unused parameters into a CTE diff --git a/edb/pgsql/compiler/context.py b/edb/pgsql/compiler/context.py index 39ba9294d3b..14a270aa4ca 100644 --- a/edb/pgsql/compiler/context.py +++ b/edb/pgsql/compiler/context.py @@ -220,7 +220,11 @@ class CompilerContextLevel(compiler.ContextLevel): #: CTEs representing decoded parameters param_ctes: Dict[str, pgast.CommonTableExpr] - #: CTEs representing schema types, when rewritten based on access policy + #: CTEs representing pointer tables that we need to force to be + #: materialized for performance reasons. + ptr_ctes: Dict[uuid.UUID, pgast.CommonTableExpr] + + #: CTEs representing types, when rewritten based on access policy type_ctes: Dict[FullRewriteKey, pgast.CommonTableExpr] #: A set of type CTEs currently being generated @@ -322,6 +326,7 @@ def __init__( self.rel = NO_STMT self.rel_hierarchy = {} self.param_ctes = {} + self.ptr_ctes = {} self.type_ctes = {} self.pending_type_ctes = set() self.dml_stmts = {} @@ -361,6 +366,7 @@ def __init__( self.rel = prevlevel.rel self.rel_hierarchy = prevlevel.rel_hierarchy self.param_ctes = prevlevel.param_ctes + self.ptr_ctes = prevlevel.ptr_ctes self.type_ctes = prevlevel.type_ctes self.pending_type_ctes = prevlevel.pending_type_ctes self.dml_stmts = prevlevel.dml_stmts diff --git a/edb/pgsql/compiler/relctx.py b/edb/pgsql/compiler/relctx.py index b6ba2abd813..4e331896409 100644 --- a/edb/pgsql/compiler/relctx.py +++ b/edb/pgsql/compiler/relctx.py @@ -1359,6 +1359,21 @@ def _lateral_union_join( type='cross', larg=larg, rarg=rarg) +def _needs_cte(typeref: irast.TypeRef) -> bool: + """Check whether a typeref needs to be forced into a materialized CTE. + + The main use case here is for sys::SystemObjects which are stored as + views that populate their data by parsing JSON metadata embedded in + comments on the SQL system objects. The query plans when fetching multi + links from these objects wind up being pretty pathologically quadratic. + + So instead we force the objects and links into materialized CTEs + so that they *can't* be shoved into nested loops. + """ + assert isinstance(typeref.name_hint, sn.QualName) + return typeref.name_hint.module == 'sys' + + def range_for_material_objtype( typeref: irast.TypeRef, path_id: irast.PathId, @@ -1386,12 +1401,24 @@ def range_for_material_objtype( ) rw_key = (typeref.id, include_descendants) key = rw_key + (dml_source_key,) + force_cte = _needs_cte(typeref) if ( not ignore_rewrites - and (rewrite := ctx.env.type_rewrites.get(rw_key)) is not None + and ( + (rewrite := ctx.env.type_rewrites.get(rw_key)) is not None + or force_cte + ) and rw_key not in ctx.pending_type_ctes and not for_mutation ): + if not rewrite: + # If we are forcing CTE materialization but there is not a + # real rewrite, then create a trivial one. + rewrite = irast.Set( + path_id=irast.PathId.from_typeref(typeref, namespace={'rw'}), + typeref=typeref, + ) + # Don't include overlays in the normal way in trigger mode # when a type cte is used, because we bake the overlays into # the cte instead (and so including them normally could union @@ -1426,9 +1453,9 @@ def range_for_material_objtype( type_rel = sctx.rel else: type_cte = pgast.CommonTableExpr( - name=ctx.env.aliases.get('t'), + name=ctx.env.aliases.get(f't_{typeref.name_hint}'), query=sctx.rel, - materialized=False, + materialized=force_cte, ) ctx.type_ctes[key] = type_cte type_rel = type_cte @@ -1782,6 +1809,36 @@ def range_from_queryset( return rvar +def _make_link_table_cte( + ptrref: irast.PointerRef, + relation: pgast.Relation, + *, + ctx: context.CompilerContextLevel, +) -> pgast.Relation: + if (ptr_cte := ctx.ptr_ctes.get(ptrref.id)) is None: + ptr_select = pgast.SelectStmt( + target_list=[ + pgast.ResTarget(val=pgast.ColumnRef(name=[pgast.Star()])), + ], + from_clause=[pgast.RelRangeVar(relation=relation)], + ) + ptr_cte = pgast.CommonTableExpr( + name=ctx.env.aliases.get(f'p_{ptrref.name}'), + query=ptr_select, + materialized=True, + ) + ctx.ptr_ctes[ptrref.id] = ptr_cte + + # We've set up the CTE to be a perfect drop in replacement for + # the real table (with a select *), so instead of pointing the + # rvar at the CTE (which would require routing path ids), just + # treat it like a base relation. + return pgast.Relation( + name=ptr_cte.name, + type_or_ptr_ref=ptrref, + ) + + def table_from_ptrref( ptrref: irast.PointerRef, ptr_info: pg_types.PointerStorageInfo, @@ -1804,6 +1861,9 @@ def table_from_ptrref( type_or_ptr_ref=ptrref, ) + if aspect == 'inhview' and _needs_cte(ptrref.out_source): + relation = _make_link_table_cte(ptrref, relation, ctx=ctx) + # Pseudo pointers (tuple and type intersection) have no schema id. sobj_id = ptrref.id if isinstance(ptrref, irast.PointerRef) else None rvar = pgast.RelRangeVar( diff --git a/edb/schema/reflection/structure.py b/edb/schema/reflection/structure.py index 12ee09a07b4..4bb3a695d4e 100644 --- a/edb/schema/reflection/structure.py +++ b/edb/schema/reflection/structure.py @@ -837,21 +837,6 @@ def generate_structure( sn.UnqualName(f'{refdict.attr}__internal'), ) - # HACK: sys::Database is an AnnotationSubject, but - # there is no way to actually put annotations on it, - # and fetching them results in some pathological - # quadratic queries where each inner iteration does - # expensive fetching of metadata and JSON decoding. - # Override it to return nothing. - # TODO: For future versions, we can probably just - # drop it. - if ( - str(rschema_name) == 'sys::Database' - and refdict.attr == 'annotations' - ): - props = {} - read_ptr = f'{read_ptr} := {{}}' - for field in props: sfn = field.sname prop_shape_els.append(f'@{sfn}') diff --git a/edb/server/bootstrap.py b/edb/server/bootstrap.py index 31764d5d954..dfba8f7469a 100644 --- a/edb/server/bootstrap.py +++ b/edb/server/bootstrap.py @@ -1287,13 +1287,15 @@ def compile_intro_queries_stdlib( ), ) - local_intro_sql = ' UNION ALL '.join(sql_intro_local_parts) + local_intro_sql = ' UNION ALL '.join( + f'({x})' for x in sql_intro_local_parts) local_intro_sql = f''' WITH intro(c) AS ({local_intro_sql}) SELECT json_agg(intro.c) FROM intro ''' - global_intro_sql = ' UNION ALL '.join(sql_intro_global_parts) + global_intro_sql = ' UNION ALL '.join( + f'({x})' for x in sql_intro_global_parts) global_intro_sql = f''' WITH intro(c) AS ({global_intro_sql}) SELECT json_agg(intro.c) FROM intro