Skip to content

Commit

Permalink
Export metric about schema feature use (#7940)
Browse files Browse the repository at this point in the history
Currently report whether the following are used:
 - policies
 - triggers
 - rewrites
 - globals
 - computed globals
 - aliases
 - functions
 - computed pointers
 - FTS 
 - link properties
 - annotations
 - indexes
 - constraints
 - multi properties
 - enums
  • Loading branch information
msullivan authored Oct 30, 2024
1 parent b40ab1d commit 8ac9372
Show file tree
Hide file tree
Showing 12 changed files with 291 additions and 7 deletions.
2 changes: 1 addition & 1 deletion edb/schema/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def has_user_defined_properties(self, schema: s_schema.Schema) -> bool:
def is_link_property(self, schema: s_schema.Schema) -> bool:
source = self.get_source(schema)
if source is None:
raise ValueError(f'{self.get_verbosename(schema)} is abstract')
return False
return isinstance(source, pointers.Pointer)

def allow_ref_propagation(
Expand Down
12 changes: 12 additions & 0 deletions edb/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ def get_objects(
*,
exclude_stdlib: bool = False,
exclude_global: bool = False,
exclude_extensions: bool = False,
exclude_internal: bool = True,
included_modules: Optional[Iterable[sn.Name]] = None,
excluded_modules: Optional[Iterable[sn.Name]] = None,
Expand Down Expand Up @@ -1499,6 +1500,7 @@ def get_objects(
*,
exclude_stdlib: bool = False,
exclude_global: bool = False,
exclude_extensions: bool = False,
exclude_internal: bool = True,
included_modules: Optional[Iterable[sn.Name]] = None,
excluded_modules: Optional[Iterable[sn.Name]] = None,
Expand All @@ -1512,6 +1514,7 @@ def get_objects(
self._id_to_type,
exclude_stdlib=exclude_stdlib,
exclude_global=exclude_global,
exclude_extensions=exclude_extensions,
exclude_internal=exclude_internal,
included_modules=included_modules,
excluded_modules=excluded_modules,
Expand Down Expand Up @@ -1613,6 +1616,7 @@ def __init__(
*,
exclude_stdlib: bool = False,
exclude_global: bool = False,
exclude_extensions: bool = False,
exclude_internal: bool = True,
included_modules: Optional[Iterable[sn.Name]],
excluded_modules: Optional[Iterable[sn.Name]],
Expand Down Expand Up @@ -1663,6 +1667,12 @@ def __init__(
lambda schema, obj: not isinstance(obj, s_pseudo.PseudoType)
)

if exclude_extensions:
filters.append(
lambda schema, obj:
obj.get_name(schema).get_root_module_name() != EXT_MODULE
)

if exclude_global:
filters.append(
lambda schema, obj: not isinstance(obj, so.GlobalObject)
Expand Down Expand Up @@ -2138,6 +2148,7 @@ def get_objects(
*,
exclude_stdlib: bool = False,
exclude_global: bool = False,
exclude_extensions: bool = False,
exclude_internal: bool = True,
included_modules: Optional[Iterable[sn.Name]] = None,
excluded_modules: Optional[Iterable[sn.Name]] = None,
Expand All @@ -2151,6 +2162,7 @@ def get_objects(
self._get_object_ids(),
exclude_global=exclude_global,
exclude_stdlib=exclude_stdlib,
exclude_extensions=exclude_extensions,
exclude_internal=exclude_internal,
included_modules=included_modules,
excluded_modules=excluded_modules,
Expand Down
10 changes: 10 additions & 0 deletions edb/server/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,9 @@ def parse_user_schema_db_config(
ext_config_settings=ext_config_settings,
protocol_version=defines.CURRENT_PROTOCOL,
state_serializer=state_serializer,
feature_used_metrics=ddl.produce_feature_used_metrics(
self.state, user_schema
),
)

def make_state_serializer(
Expand Down Expand Up @@ -1945,6 +1948,11 @@ def _compile_ql_transaction(
global_schema=final_global_schema,
sp_name=sp_name,
sp_id=sp_id,
feature_used_metrics=(
ddl.produce_feature_used_metrics(
ctx.compiler_state, final_user_schema
) if final_user_schema else None
),
)


Expand Down Expand Up @@ -2477,6 +2485,7 @@ def _try_compile(
unit.extensions, unit.ext_config_settings = (
_extract_extensions(ctx, comp.user_schema)
)
unit.feature_used_metrics = comp.feature_used_metrics
if comp.cached_reflection is not None:
unit.cached_reflection = \
pickle.dumps(comp.cached_reflection, -1)
Expand Down Expand Up @@ -2505,6 +2514,7 @@ def _try_compile(
unit.extensions, unit.ext_config_settings = (
_extract_extensions(ctx, comp.user_schema)
)
unit.feature_used_metrics = comp.feature_used_metrics
if comp.cached_reflection is not None:
unit.cached_reflection = \
pickle.dumps(comp.cached_reflection, -1)
Expand Down
9 changes: 8 additions & 1 deletion edb/server/compiler/dbstate.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ class SessionStateQuery(BaseQuery):
@dataclasses.dataclass(frozen=True)
class DDLQuery(BaseQuery):

user_schema: s_schema.FlatSchema
user_schema: Optional[s_schema.FlatSchema]
feature_used_metrics: Optional[dict[str, float]]
global_schema: Optional[s_schema.FlatSchema] = None
cached_reflection: Any = None
is_transactional: bool = True
Expand All @@ -184,6 +185,7 @@ class TxControlQuery(BaseQuery):
user_schema: Optional[s_schema.Schema] = None
global_schema: Optional[s_schema.Schema] = None
cached_reflection: Any = None
feature_used_metrics: Optional[dict[str, float]] = None

sp_name: Optional[str] = None
sp_id: Optional[int] = None
Expand Down Expand Up @@ -336,6 +338,10 @@ class QueryUnit:
# If present, represents the future schema state after
# the command is run. The schema is pickled.
user_schema: Optional[bytes] = None
# If present, represents updated metrics about feature use induced
# by the new user_schema.
feature_used_metrics: Optional[dict[str, float]] = None

# Unlike user_schema, user_schema_version usually exist, pointing to the
# latest user schema, which is self.user_schema if changed, or the user
# schema this QueryUnit was compiled upon.
Expand Down Expand Up @@ -619,6 +625,7 @@ class ParsedDatabase:
schema_version: uuid.UUID
database_config: immutables.Map[str, config.SettingValue]
ext_config_settings: list[config.Setting]
feature_used_metrics: dict[str, float]

protocol_version: defines.ProtocolVersion
state_serializer: sertypes.StateSerializer
Expand Down
117 changes: 114 additions & 3 deletions edb/server/compiler/ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,27 @@
from edb.edgeql import qltypes
from edb.edgeql import quote as qlquote


from edb.schema import annos as s_annos
from edb.schema import constraints as s_constraints
from edb.schema import database as s_db
from edb.schema import ddl as s_ddl
from edb.schema import delta as s_delta
from edb.schema import expraliases as s_expraliases
from edb.schema import functions as s_func
from edb.schema import globals as s_globals
from edb.schema import indexes as s_indexes
from edb.schema import links as s_links
from edb.schema import migrations as s_migrations
from edb.schema import objects as s_obj
from edb.schema import objtypes as s_objtypes
from edb.schema import policies as s_policies
from edb.schema import pointers as s_pointers
from edb.schema import properties as s_properties
from edb.schema import rewrites as s_rewrites
from edb.schema import scalars as s_scalars
from edb.schema import schema as s_schema
from edb.schema import triggers as s_triggers
from edb.schema import utils as s_utils
from edb.schema import version as s_ver

Expand Down Expand Up @@ -165,6 +180,7 @@ def compile_and_apply_ddl_stmt(
user_schema=current_tx.get_user_schema(),
is_transactional=True,
warnings=tuple(delta.warnings),
feature_used_metrics=None,
)

store_migration_sdl = compiler._get_config_val(ctx, 'store_migration_sdl')
Expand Down Expand Up @@ -196,6 +212,7 @@ def compile_and_apply_ddl_stmt(
user_schema=current_tx.get_user_schema(),
is_transactional=True,
warnings=tuple(delta.warnings),
feature_used_metrics=None,
)

# Apply and adapt delta, build native delta plan, which
Expand Down Expand Up @@ -271,6 +288,7 @@ def compile_and_apply_ddl_stmt(
debug.header('Delta Script')
debug.dump_code(b'\n'.join(sql), lexer='sql')

new_user_schema = current_tx.get_user_schema_if_updated()
return dbstate.DDLQuery(
sql=sql,
is_transactional=is_transactional,
Expand All @@ -280,11 +298,15 @@ def compile_and_apply_ddl_stmt(
create_db_template=create_db_template,
create_db_mode=create_db_mode,
ddl_stmt_id=ddl_stmt_id,
user_schema=current_tx.get_user_schema_if_updated(), # type: ignore
user_schema=new_user_schema,
cached_reflection=current_tx.get_cached_reflection_if_updated(),
global_schema=current_tx.get_global_schema_if_updated(),
config_ops=config_ops,
warnings=tuple(delta.warnings),
feature_used_metrics=(
produce_feature_used_metrics(ctx.compiler_state, new_user_schema)
if new_user_schema else None
),
)


Expand Down Expand Up @@ -1169,6 +1191,97 @@ def _reset_schema(
)


_FEATURE_NAMES: dict[type[s_obj.Object], str] = {
s_annos.AnnotationValue: 'annotation',
s_policies.AccessPolicy: 'policy',
s_triggers.Trigger: 'trigger',
s_rewrites.Rewrite: 'rewrite',
s_globals.Global: 'global',
s_expraliases.Alias: 'alias',
s_func.Function: 'function',
s_indexes.Index: 'index',
s_scalars.ScalarType: 'scalar',
}


def produce_feature_used_metrics(
compiler_state: compiler.CompilerState,
user_schema: s_schema.Schema,
) -> dict[str, float]:
schema = s_schema.ChainedSchema(
compiler_state.std_schema,
user_schema,
# Skipping global schema is a little dodgy but not that bad
s_schema.EMPTY_SCHEMA,
)

features: dict[str, float] = {}

def _track(key: str) -> None:
features[key] = 1

# TODO(perf): Should we optimize peeking into the innards directly
# so we can skip creating the proxies?
for obj in user_schema.get_objects(
type=s_obj.Object, exclude_extensions=True,
):
typ = type(obj)
if (key := _FEATURE_NAMES.get(typ)):
_track(key)

if isinstance(obj, s_globals.Global) and obj.get_expr(user_schema):
_track('computed_global')
elif (
isinstance(obj, s_properties.Property)
):
if obj.get_expr(user_schema):
_track('computed_property')
elif obj.get_cardinality(schema).is_multi():
_track('multi_property')

if (
obj.is_link_property(schema)
and not obj.is_special_pointer(schema)
):
_track('link_property')
elif (
isinstance(obj, s_links.Link)
and obj.get_expr(user_schema)
):
_track('computed_link')
elif (
isinstance(obj, s_indexes.Index)
and s_indexes.is_fts_index(schema, obj)
):
_track('fts')
elif (
isinstance(obj, s_constraints.Constraint)
and not (
(subject := obj.get_subject(schema))
and isinstance(subject, s_properties.Property)
and subject.is_special_pointer(schema)
)
):
_track('constraint')
exclusive_constr = schema.get(
'std::exclusive', type=s_constraints.Constraint
)
if not obj.issubclass(schema, exclusive_constr):
_track('constraint_expr')
elif (
isinstance(obj, s_objtypes.ObjectType)
and len(obj.get_bases(schema).objects(schema)) > 1
):
_track('multiple_inheritance')
elif (
isinstance(obj, s_scalars.ScalarType)
and obj.is_enum(schema)
):
_track('enum')

return features


def repair_schema(
ctx: compiler.CompileContext,
) -> Optional[tuple[tuple[bytes, ...], s_schema.Schema, Any]]:
Expand Down Expand Up @@ -1280,7 +1393,6 @@ def administer_reindex(
from edb.schema import objtypes as s_objtypes
from edb.schema import constraints as s_constraints
from edb.schema import indexes as s_indexes
from edb.schema import pointers as s_pointers

if len(ql.expr.args) != 1 or ql.expr.kwargs:
raise errors.QueryError(
Expand Down Expand Up @@ -1418,7 +1530,6 @@ def administer_vacuum(
from edb.ir import ast as irast
from edb.ir import typeutils as irtypeutils
from edb.schema import objtypes as s_objtypes
from edb.schema import pointers as s_pointers

# check that the kwargs are valid
kwargs: Dict[str, str] = {}
Expand Down
4 changes: 4 additions & 0 deletions edb/server/dbview/dbview.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ cdef class Database:
readonly object reflection_cache
readonly object backend_ids
readonly object extensions
readonly object _feature_used_metrics

cdef _invalidate_caches(self)
cdef _cache_compiled_query(self, key, compiled)
Expand All @@ -102,12 +103,14 @@ cdef class Database:
self,
extensions,
)
cdef _set_feature_used_metrics(self, feature_used_metrics)
cdef _set_and_signal_new_user_schema(
self,
new_schema_pickle,
schema_version,
extensions,
ext_config_settings,
feature_used_metrics,
reflection_cache=?,
backend_ids=?,
db_config=?,
Expand Down Expand Up @@ -208,6 +211,7 @@ cdef class DatabaseConnectionView:
global_schema,
roles,
cached_reflection,
feature_used_metrics,
)

cdef get_user_config_spec(self)
Expand Down
1 change: 1 addition & 0 deletions edb/server/dbview/dbview.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ class DatabaseIndex:
extensions: Optional[set[str]],
ext_config_settings: Optional[list[config.Setting]],
early: bool = False,
feature_used_metrics: Optional[Mapping[str, float]] = ...,
) -> Database:
...

Expand Down
Loading

0 comments on commit 8ac9372

Please sign in to comment.