Skip to content

Commit

Permalink
(#4884) Add support for ratio metrics (#5027)
Browse files Browse the repository at this point in the history
* wip

* More support for ratio metrics

* Formatting and linting

* Fix unit tests

* Support disabling metrics

* mypy

* address all TODOs

* make pypy happy

* wip

* checkpoint

* refactor, remove ratio_terms

* flake8 and unit tests

* remove debugger

* quickfix for filters

* Experiment with functional testing for 'expression' metrics

* reformatting slightly

* make file and mypy fix

* remove config from metrics - wip

* add metrics back to context

* adding test changes

* fixing test metrics

* revert name audit

* pre-commit fixes

* add changelog

* Bumping manifest version to v6 (#5430)

* Bumping manifest version to v6

* Adding manifest file for tests

* Reverting unneeded changes

* Updating v6

* Updating test to add metrics field

* Adding changelog

* add v5 to backwards compatibility

* Clean up test_previous_version_state, update for v6 (#5440)

* Update test_previous_version_state for v6. Cleanup

* Regenerate, rm breakpoint

* Code checks

* Add assertion that will fail when we bump manifest version

* update tests to automatically tests all previous versions

Co-authored-by: Emily Rockman <[email protected]>

Co-authored-by: Jeremy Cohen <[email protected]>
Co-authored-by: Callum McCann <[email protected]>
Co-authored-by: Emily Rockman <[email protected]>
Co-authored-by: leahwicz <[email protected]>
  • Loading branch information
5 people authored Jul 6, 2022
1 parent febbd97 commit 064d890
Show file tree
Hide file tree
Showing 33 changed files with 7,070 additions and 85 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20220705-083026.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Add support for ratio metrics
time: 2022-07-05T08:30:26.494837-05:00
custom:
Author: drewbanin callum-mcdata
Issue: "4884"
PR: "5027"
7 changes: 7 additions & 0 deletions .changes/unreleased/Under the Hood-20220705-142120.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Under the Hood
body: Bump manifest version to v6
time: 2022-07-05T14:21:20.66768-04:00
custom:
Author: leahwicz
Issue: "5417"
PR: "5430"
2 changes: 2 additions & 0 deletions core/dbt/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ def link_node(self, linker: Linker, node: GraphMemberNode, manifest: Manifest):
linker.dependency(node.unique_id, (manifest.nodes[dependency].unique_id))
elif dependency in manifest.sources:
linker.dependency(node.unique_id, (manifest.sources[dependency].unique_id))
elif dependency in manifest.metrics:
linker.dependency(node.unique_id, (manifest.metrics[dependency].unique_id))
else:
dependency_not_found(node, dependency)

Expand Down
84 changes: 82 additions & 2 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
ParsedSeedNode,
ParsedSourceDefinition,
)
from dbt.contracts.graph.metrics import MetricReference, ResolvedMetricReference
from dbt.exceptions import (
CompilationException,
ParsingException,
Expand All @@ -50,7 +51,9 @@
missing_config,
raise_compiler_error,
ref_invalid_args,
metric_invalid_args,
ref_target_not_found,
metric_target_not_found,
ref_bad_context,
source_target_not_found,
wrapped_exports,
Expand Down Expand Up @@ -199,7 +202,7 @@ def Relation(self):
return self.db_wrapper.Relation

@abc.abstractmethod
def __call__(self, *args: str) -> Union[str, RelationProxy]:
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
pass


Expand Down Expand Up @@ -265,6 +268,41 @@ def __call__(self, *args: str) -> RelationProxy:
return self.resolve(args[0], args[1])


class BaseMetricResolver(BaseResolver):
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
...

def _repack_args(self, name: str, package: Optional[str]) -> List[str]:
if package is None:
return [name]
else:
return [package, name]

def validate_args(self, name: str, package: Optional[str]):
if not isinstance(name, str):
raise CompilationException(
f"The name argument to metric() must be a string, got {type(name)}"
)

if package is not None and not isinstance(package, str):
raise CompilationException(
f"The package argument to metric() must be a string or None, got {type(package)}"
)

def __call__(self, *args: str) -> MetricReference:
name: str
package: Optional[str] = None

if len(args) == 1:
name = args[0]
elif len(args) == 2:
package, name = args
else:
metric_invalid_args(self.model, args)
self.validate_args(name, package)
return self.resolve(name, package)


class Config(Protocol):
def __init__(self, model, context_config: Optional[ContextConfig]):
...
Expand Down Expand Up @@ -511,6 +549,34 @@ def resolve(self, source_name: str, table_name: str):
return self.Relation.create_from_source(target_source)


# metric` implementations
class ParseMetricResolver(BaseMetricResolver):
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
self.model.metrics.append(self._repack_args(name, package))

return MetricReference(name, package)


class RuntimeMetricResolver(BaseMetricResolver):
def resolve(self, target_name: str, target_package: Optional[str] = None) -> MetricReference:
target_metric = self.manifest.resolve_metric(
target_name,
target_package,
self.current_project,
self.model.package_name,
)

if target_metric is None or isinstance(target_metric, Disabled):
# TODO : Use a different exception!!
metric_target_not_found(
self.model,
target_name,
target_package,
)

return ResolvedMetricReference(target_metric, self.manifest, self.Relation)


# `var` implementations.
class ModelConfiguredVar(Var):
def __init__(
Expand Down Expand Up @@ -568,6 +634,7 @@ class Provider(Protocol):
Var: Type[ModelConfiguredVar]
ref: Type[BaseRefResolver]
source: Type[BaseSourceResolver]
metric: Type[BaseMetricResolver]


class ParseProvider(Provider):
Expand All @@ -577,6 +644,7 @@ class ParseProvider(Provider):
Var = ParseVar
ref = ParseRefResolver
source = ParseSourceResolver
metric = ParseMetricResolver


class GenerateNameProvider(Provider):
Expand All @@ -586,6 +654,7 @@ class GenerateNameProvider(Provider):
Var = RuntimeVar
ref = ParseRefResolver
source = ParseSourceResolver
metric = ParseMetricResolver


class RuntimeProvider(Provider):
Expand All @@ -595,6 +664,7 @@ class RuntimeProvider(Provider):
Var = RuntimeVar
ref = RuntimeRefResolver
source = RuntimeSourceResolver
metric = RuntimeMetricResolver


class OperationProvider(RuntimeProvider):
Expand Down Expand Up @@ -778,6 +848,10 @@ def ref(self) -> Callable:
def source(self) -> Callable:
return self.provider.source(self.db_wrapper, self.model, self.config, self.manifest)

@contextproperty
def metric(self) -> Callable:
return self.provider.metric(self.db_wrapper, self.model, self.config, self.manifest)

@contextproperty("config")
def ctx_config(self) -> Config:
"""The `config` variable exists to handle end-user configuration for
Expand Down Expand Up @@ -1355,7 +1429,7 @@ def validate_args(self, name, package):
if not isinstance(name, str):
raise ParsingException(
f"In a metrics section in {self.model.original_file_path} "
f"the name argument to ref() must be a string"
"the name argument to ref() must be a string"
)


Expand All @@ -1373,6 +1447,12 @@ def generate_parse_metrics(
project,
manifest,
),
"metric": ParseMetricResolver(
None,
metric,
project,
manifest,
),
}


Expand Down
66 changes: 64 additions & 2 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,39 @@ def perform_lookup(self, unique_id: UniqueID, manifest) -> ManifestNode:
return manifest.nodes[unique_id]


class MetricLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
self.populate(manifest)

def get_unique_id(self, search_name, package: Optional[PackageName]):
return find_unique_id_for_package(self.storage, search_name, package)

def find(self, search_name, package: Optional[PackageName], manifest: "Manifest"):
unique_id = self.get_unique_id(search_name, package)
if unique_id is not None:
return self.perform_lookup(unique_id, manifest)
return None

def add_metric(self, metric: ParsedMetric):
if metric.search_name not in self.storage:
self.storage[metric.search_name] = {}

self.storage[metric.search_name][metric.package_name] = metric.unique_id

def populate(self, manifest):
for metric in manifest.metrics.values():
if hasattr(metric, "name"):
self.add_metric(metric)

def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> ParsedMetric:
if unique_id not in manifest.metrics:
raise dbt.exceptions.InternalException(
f"Metric {unique_id} found in cache but not found in manifest"
)
return manifest.metrics[unique_id]


# This handles both models/seeds/snapshots and sources
class DisabledLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
Expand Down Expand Up @@ -431,6 +464,9 @@ class Disabled(Generic[D]):
target: D


MaybeMetricNode = Optional[ParsedMetric]


MaybeDocumentation = Optional[ParsedDocumentation]


Expand Down Expand Up @@ -592,6 +628,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
_ref_lookup: Optional[RefableLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_metric_lookup: Optional[MetricLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_disabled_lookup: Optional[DisabledLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
Expand Down Expand Up @@ -837,6 +876,12 @@ def ref_lookup(self) -> RefableLookup:
self._ref_lookup = RefableLookup(self)
return self._ref_lookup

@property
def metric_lookup(self) -> MetricLookup:
if self._metric_lookup is None:
self._metric_lookup = MetricLookup(self)
return self._metric_lookup

def rebuild_ref_lookup(self):
self._ref_lookup = RefableLookup(self)

Expand Down Expand Up @@ -912,6 +957,22 @@ def resolve_source(
return Disabled(disabled[0])
return None

def resolve_metric(
self,
target_metric_name: str,
target_metric_package: Optional[str],
current_project: str,
node_package: str,
) -> MaybeMetricNode:
metric: Optional[ParsedMetric] = None

candidates = _search_packages(current_project, node_package, target_metric_package)
for pkg in candidates:
metric = self.metric_lookup.find(target_metric_name, pkg, self)
if metric is not None:
return metric
return None

# Called by DocsRuntimeContext.doc
def resolve_doc(
self,
Expand Down Expand Up @@ -1076,6 +1137,7 @@ def __reduce_ex__(self, protocol):
self._doc_lookup,
self._source_lookup,
self._ref_lookup,
self._metric_lookup,
self._disabled_lookup,
self._analysis_lookup,
)
Expand All @@ -1095,7 +1157,7 @@ def __init__(self, macros):


@dataclass
@schema_version("manifest", 5)
@schema_version("manifest", 6)
class WritableManifest(ArtifactMixin):
nodes: Mapping[UniqueID, ManifestNode] = field(
metadata=dict(description=("The nodes defined in the dbt project and its dependencies"))
Expand Down Expand Up @@ -1141,7 +1203,7 @@ class WritableManifest(ArtifactMixin):

@classmethod
def compatible_previous_versions(self):
return [("manifest", 4)]
return [("manifest", 4), ("manifest", 5)]

def __post_serialize__(self, dct):
for unique_id, node in dct["nodes"].items():
Expand Down
70 changes: 70 additions & 0 deletions core/dbt/contracts/graph/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from dbt.node_types import NodeType


class MetricReference(object):
def __init__(self, metric_name, package_name=None):
self.metric_name = metric_name
self.package_name = package_name

def __str__(self):
return f"{self.metric_name}"


class ResolvedMetricReference(MetricReference):
"""
Simple proxy over a ParsedMetric which delegates property
lookups to the underlying node. Also adds helper functions
for working with metrics (ie. __str__ and templating functions)
"""

def __init__(self, node, manifest, Relation):
super().__init__(node.name, node.package_name)
self.node = node
self.manifest = manifest
self.Relation = Relation

def __getattr__(self, key):
return getattr(self.node, key)

def __str__(self):
return f"{self.node.name}"

@classmethod
def parent_metrics(cls, metric_node, manifest):
yield metric_node

for parent_unique_id in metric_node.depends_on.nodes:
node = manifest.metrics.get(parent_unique_id)
if node and node.resource_type == NodeType.Metric:
yield from cls.parent_metrics(node, manifest)

def parent_models(self):
in_scope_metrics = list(self.parent_metrics(self.node, self.manifest))

to_return = {
"base": [],
"derived": [],
}
for metric in in_scope_metrics:
if metric.type == "expression":
to_return["derived"].append(
{"metric_source": None, "metric": metric, "is_derived": True}
)
else:
for node_unique_id in metric.depends_on.nodes:
node = self.manifest.nodes.get(node_unique_id)
if node and node.resource_type in NodeType.refable():
to_return["base"].append(
{
"metric_relation_node": node,
"metric_relation": self.Relation.create(
database=node.database,
schema=node.schema,
identifier=node.alias,
),
"metric": metric,
"is_derived": False,
}
)

return to_return
Loading

0 comments on commit 064d890

Please sign in to comment.