Skip to content

Commit

Permalink
Add structured config compilation (#8098)
Browse files Browse the repository at this point in the history
Structured config values are Python objects from environment variables
or (future) TOML config files. The compilation re-assembles the objects
into `ConfigOp` ASTs and uses the static evaluation mechanism to
generate verified config values.
  • Loading branch information
fantix authored Dec 13, 2024
1 parent 47efb6e commit e80225e
Show file tree
Hide file tree
Showing 15 changed files with 815 additions and 146 deletions.
59 changes: 59 additions & 0 deletions edb/ir/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
from edb.schema import modules as s_mod
from edb.schema import name as sn
from edb.schema import objects as so
from edb.schema import objtypes as s_objtypes
from edb.schema import pointers as s_pointers
from edb.schema import schema as s_schema
from edb.schema import types as s_types
Expand Down Expand Up @@ -1400,3 +1401,61 @@ class FTSDocument(ImmutableExpr):
weight: typing.Optional[str]

typeref: TypeRef


# StaticIntrospection is only used in static evaluation (staeval.py),
# but unfortunately the IR AST node can only be defined here.
class StaticIntrospection(Tuple):

ir: TypeIntrospection
schema: s_schema.Schema

@property
def meta_type(self) -> s_objtypes.ObjectType:
return self.schema.get_by_id(
self.ir.typeref.id, type=s_objtypes.ObjectType
)

@property
def output_type(self) -> s_types.Type:
return self.schema.get_by_id(
self.ir.output_typeref.id, type=s_types.Type
)

@property
def elements(self) -> typing.List[TupleElement]:
from . import staeval

rv = []
schema = self.schema
output_type = self.output_type
for ptr in self.meta_type.get_pointers(schema).objects(schema):
field_sn = ptr.get_shortname(schema)
field_name = field_sn.name
field_type = ptr.get_target(schema)
assert field_type is not None
try:
field_value = output_type.get_field_value(schema, field_name)
except LookupError:
continue
try:
val = staeval.coerce_py_const(field_type.id, field_value)
except staeval.UnsupportedExpressionError:
continue
ref = TypeRef(id=field_type.id, name_hint=field_sn)
vset = Set(expr=val, typeref=ref, path_id=PathId.from_typeref(ref))
rv.append(TupleElement(name=field_name, val=vset))
return rv

@elements.setter
def elements(self, elements: typing.List[TupleElement]) -> None:
pass

def get_field_value(self, name: sn.QualName) -> ConstExpr | TypeCast:
from . import staeval

ptr = self.meta_type.getptr(self.schema, name.get_local_name())
rv_type = ptr.get_target(self.schema)
assert rv_type is not None
rv_value = self.output_type.get_field_value(self.schema, name.name)
return staeval.coerce_py_const(rv_type.id, rv_value)
150 changes: 148 additions & 2 deletions edb/ir/staeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

import decimal
import functools
import uuid

import immutables

Expand All @@ -43,6 +44,7 @@
from edb.common import typeutils
from edb.common import parsing
from edb.common import uuidgen
from edb.common import value_dispatch
from edb.edgeql import ast as qlast
from edb.edgeql import compiler as qlcompiler
from edb.edgeql import qltypes
Expand All @@ -52,6 +54,7 @@
from edb.ir import statypes as statypes
from edb.ir import utils as irutils

from edb.schema import name as sn
from edb.schema import objects as s_obj
from edb.schema import objtypes as s_objtypes
from edb.schema import types as s_types
Expand Down Expand Up @@ -100,6 +103,52 @@ def evaluate_SelectStmt(
'expression is not constant', span=ir_stmt.span)


@evaluate.register(irast.InsertStmt)
def evaluate_InsertStmt(
ir: irast.InsertStmt, schema: s_schema.Schema
) -> EvaluationResult:
# InsertStmt should NOT be statically evaluated in general;
# This is a special case for inserting nested cfg::ConfigObject
# when it's evaluated into a named tuple and then squashed into
# a Python dict to be used in compile_structured_config().
tmp_schema, subject_type = irtyputils.ir_typeref_to_type(
schema, ir.subject.expr.typeref
)
config_obj = schema.get("cfg::ConfigObject")
assert isinstance(config_obj, s_obj.SubclassableObject)
if subject_type.issubclass(tmp_schema, config_obj):
return irast.Tuple(
named=True,
typeref=ir.subject.typeref,
elements=[
irast.TupleElement(
name=ptr_set.expr.ptrref.shortname.name,
val=irast.Set(
expr=evaluate(ptr_set.expr.expr, schema),
typeref=ptr_set.typeref,
path_id=ptr_set.path_id,
),
)
for ptr_set, _ in ir.subject.shape
if ptr_set.expr.ptrref.shortname.name != "id"
and ptr_set.expr.expr is not None
],
)

raise UnsupportedExpressionError(
f'no static IR evaluation handler for general {ir.__class__}'
)


@evaluate.register(irast.TypeIntrospection)
def evaluate_TypeIntrospection(
ir: irast.TypeIntrospection, schema: s_schema.Schema
) -> EvaluationResult:
return irast.StaticIntrospection(
named=True, ir=ir, schema=schema, elements=[], typeref=ir.typeref
)


@evaluate.register(irast.TypeCast)
def evaluate_TypeCast(
ir_cast: irast.TypeCast, schema: s_schema.Schema
Expand All @@ -108,7 +157,7 @@ def evaluate_TypeCast(
schema, from_type = irtyputils.ir_typeref_to_type(
schema, ir_cast.from_type)
schema, to_type = irtyputils.ir_typeref_to_type(
schema, ir_cast.from_type)
schema, ir_cast.to_type)

if (
not isinstance(from_type, s_scalars.ScalarType)
Expand Down Expand Up @@ -141,11 +190,39 @@ def evaluate_Pointer(
) -> EvaluationResult:
if ptr.expr is not None:
return evaluate(ptr.expr, schema=schema)

elif (
ptr.direction == s_pointers.PointerDirection.Outbound
and isinstance(ptr.ptrref, irast.PointerRef)
and ptr.ptrref.out_cardinality.is_single()
and ptr.ptrref.out_target.is_scalar
):
return evaluate_pointer_ref(
evaluate(ptr.source.expr, schema=schema), ptr.ptrref
)

else:
raise UnsupportedExpressionError(
'expression is not constant', span=ptr.span)


@functools.singledispatch
def evaluate_pointer_ref(
evaluated_source: EvaluationResult, ptrref: irast.PointerRef
) -> EvaluationResult:
raise UnsupportedExpressionError(
f'unsupported PointerRef on source {evaluated_source}',
span=ptrref.span,
)


@evaluate_pointer_ref.register(irast.StaticIntrospection)
def evaluate_pointer_ref_StaticIntrospection(
source: irast.StaticIntrospection, ptrref: irast.PointerRef
) -> EvaluationResult:
return source.get_field_value(ptrref.shortname)


@evaluate.register(irast.ConstExpr)
def evaluate_BaseConstant(
ir_const: irast.ConstExpr, schema: s_schema.Schema
Expand Down Expand Up @@ -447,7 +524,8 @@ def python_cast_str(sval: str, pytype: type) -> Any:
return False
else:
raise errors.InvalidValueError(
f"invalid input syntax for type bool: {sval!r}"
f"invalid input syntax for type bool: {sval!r}",
hint="bool value can only be one of: true, false"
)
else:
return pytype(sval)
Expand Down Expand Up @@ -645,3 +723,71 @@ def evaluate_config_reset(
setting_name=ir.name,
value=None,
)


@evaluate_to_config_op.register(irast.ConfigInsert)
def evaluate_config_insert(
ir: irast.ConfigInsert, schema: s_schema.Schema
) -> config.Operation:
return config.Operation(
opcode=config.OpCode.CONFIG_ADD,
scope=ir.scope,
setting_name=ir.name,
value=evaluate_to_python_val(
irast.InsertStmt(subject=ir.expr), schema=schema
),
)


@value_dispatch.value_dispatch
def coerce_py_const(
type_id: uuid.UUID, val: Any
) -> irast.ConstExpr | irast.TypeCast:
raise UnsupportedExpressionError(f"unimplemented coerce type: {type_id}")


@coerce_py_const.register(s_obj.get_known_type_id("std::str"))
def evaluate_std_str(
type_id: uuid.UUID, val: Any
) -> irast.ConstExpr | irast.TypeCast:
return irast.StringConstant(
typeref=irast.TypeRef(
id=type_id, name_hint=sn.name_from_string("std::str")
),
value=str(val),
)


@coerce_py_const.register(s_obj.get_known_type_id("std::bool"))
def evaluate_std_bool(
type_id: uuid.UUID, val: Any
) -> irast.ConstExpr | irast.TypeCast:
return irast.BooleanConstant(
typeref=irast.TypeRef(
id=type_id, name_hint=sn.name_from_string("std::bool")
),
value=str(bool(val)).lower(),
)


@coerce_py_const.register(s_obj.get_known_type_id("std::uuid"))
def evaluate_std_uuid(
type_id: uuid.UUID, val: Any
) -> irast.ConstExpr | irast.TypeCast:
str_type_id = s_obj.get_known_type_id("std::str")
str_typeref = irast.TypeRef(
id=str_type_id, name_hint=sn.name_from_string("std::str")
)
return irast.TypeCast(
from_type=str_typeref,
to_type=irast.TypeRef(
id=type_id, name_hint=sn.name_from_string("std::uuid")
),
expr=irast.Set(
expr=irast.StringConstant(typeref=str_typeref, value=str(val)),
typeref=str_typeref,
path_id=irast.PathId.from_typeref(str_typeref),
),
sql_cast=True,
sql_expr=False,
)
18 changes: 9 additions & 9 deletions edb/server/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2436,7 +2436,7 @@ async def _pg_ensure_database_not_connected(
f'database {dbname!r} is being accessed by other users')


async def _start(ctx: BootstrapContext) -> edbcompiler.CompilerState:
async def _start(ctx: BootstrapContext) -> edbcompiler.Compiler:
conn = await _check_catalog_compatibility(ctx)

try:
Expand All @@ -2445,7 +2445,7 @@ async def _start(ctx: BootstrapContext) -> edbcompiler.CompilerState:
ctx.cluster.overwrite_capabilities(struct.Struct('!Q').unpack(caps)[0])
_check_capabilities(ctx)

return (await edbcompiler.new_compiler_from_pg(conn)).state
return await edbcompiler.new_compiler_from_pg(conn)

finally:
conn.terminate()
Expand Down Expand Up @@ -2473,7 +2473,7 @@ async def _bootstrap_edgedb_super_roles(ctx: BootstrapContext) -> uuid.UUID:
async def _bootstrap(
ctx: BootstrapContext,
no_template: bool=False,
) -> edbcompiler.CompilerState:
) -> edbcompiler.Compiler:
args = ctx.args
cluster = ctx.cluster
backend_params = cluster.get_runtime_params()
Expand Down Expand Up @@ -2690,13 +2690,13 @@ async def _bootstrap(
args.default_database_user or edbdef.EDGEDB_SUPERUSER,
)

return compiler.state
return compiler


async def ensure_bootstrapped(
cluster: pgcluster.BaseCluster,
args: edbargs.ServerConfig,
) -> tuple[bool, edbcompiler.CompilerState]:
) -> tuple[bool, edbcompiler.Compiler]:
"""Bootstraps Gel instance if it hasn't been bootstrapped already.
Returns True if bootstrap happened and False if the instance was already
Expand All @@ -2712,10 +2712,10 @@ async def ensure_bootstrapped(
mode = await _get_cluster_mode(ctx)
ctx = dataclasses.replace(ctx, mode=mode)
if mode == ClusterMode.pristine:
state = await _bootstrap(ctx)
return True, state
compiler = await _bootstrap(ctx)
return True, compiler
else:
state = await _start(ctx)
return False, state
compiler = await _start(ctx)
return False, compiler
finally:
pgconn.terminate()
23 changes: 23 additions & 0 deletions edb/server/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
from edb.pgsql import types as pg_types
from edb.pgsql import delta as pg_delta

from . import config as config_compiler
from . import dbstate
from . import enums
from . import explain
Expand Down Expand Up @@ -1400,6 +1401,28 @@ def validate_schema_equivalence(
pickle.loads(global_schema),
)

def compile_structured_config(
self,
objects: Mapping[str, config_compiler.ConfigObject],
source: str | None = None,
allow_nested: bool = False,
) -> dict[str, immutables.Map[str, config.SettingValue]]:
# XXX: only config in the stdlib is supported currently, so the only
# key allowed in objects is "cfg::Config". API for future compatibility
if list(objects) != ["cfg::Config"]:
difference = set(objects) - {"cfg::Config"}
raise NotImplementedError(
f"unsupported config: {', '.join(difference)}"
)

return config_compiler.compile_structured_config(
objects,
spec=self.state.config_spec,
schema=self.state.std_schema,
source=source,
allow_nested=allow_nested,
)


def compile_schema_storage_in_delta(
ctx: CompileContext,
Expand Down
Loading

0 comments on commit e80225e

Please sign in to comment.