(#4884) Add support for ratio metrics (#5027)

* wip * More support for ratio metrics * Formatting and linting * Fix unit tests * Support disabling metrics * mypy * address all TODOs * make pypy happy * wip * checkpoint * refactor, remove ratio_terms * flake8 and unit tests * remove debugger * quickfix for filters * Experiment with functional testing for 'expression' metrics * reformatting slightly * make file and mypy fix * remove config from metrics - wip * add metrics back to context * adding test changes * fixing test metrics * revert name audit * pre-commit fixes * add changelog * Bumping manifest version to v6 (#5430) * Bumping manifest version to v6 * Adding manifest file for tests * Reverting unneeded changes * Updating v6 * Updating test to add metrics field * Adding changelog * add v5 to backwards compatibility * Clean up test_previous_version_state, update for v6 (#5440) * Update test_previous_version_state for v6. Cleanup * Regenerate, rm breakpoint * Code checks * Add assertion that will fail when we bump manifest version * update tests to automatically tests all previous versions Co-authored-by: Emily Rockman <[email protected]> Co-authored-by: Jeremy Cohen <[email protected]> Co-authored-by: Callum McCann <[email protected]> Co-authored-by: Emily Rockman <[email protected]> Co-authored-by: leahwicz <[email protected]>
dbt-labs · Jul 6, 2022 · 064d890 · 064d890
1 parent febbd97
commit 064d890
Show file tree

Hide file tree

Showing 33 changed files with 7,070 additions and 85 deletions.
diff --git a/.changes/unreleased/Features-20220705-083026.yaml b/.changes/unreleased/Features-20220705-083026.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Add support for ratio metrics
+time: 2022-07-05T08:30:26.494837-05:00
+custom:
+  Author: drewbanin callum-mcdata
+  Issue: "4884"
+  PR: "5027"
diff --git a/.changes/unreleased/Under the Hood-20220705-142120.yaml b/.changes/unreleased/Under the Hood-20220705-142120.yaml
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Bump manifest version to v6
+time: 2022-07-05T14:21:20.66768-04:00
+custom:
+  Author: leahwicz
+  Issue: "5417"
+  PR: "5430"
diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py
@@ -397,6 +397,8 @@ def link_node(self, linker: Linker, node: GraphMemberNode, manifest: Manifest):
                 linker.dependency(node.unique_id, (manifest.nodes[dependency].unique_id))
             elif dependency in manifest.sources:
                 linker.dependency(node.unique_id, (manifest.sources[dependency].unique_id))
+            elif dependency in manifest.metrics:
+                linker.dependency(node.unique_id, (manifest.metrics[dependency].unique_id))
             else:
                 dependency_not_found(node, dependency)
 

diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py
@@ -40,6 +40,7 @@
     ParsedSeedNode,
     ParsedSourceDefinition,
 )
+from dbt.contracts.graph.metrics import MetricReference, ResolvedMetricReference
 from dbt.exceptions import (
     CompilationException,
     ParsingException,
@@ -50,7 +51,9 @@
     missing_config,
     raise_compiler_error,
     ref_invalid_args,
+    metric_invalid_args,
     ref_target_not_found,
+    metric_target_not_found,
     ref_bad_context,
     source_target_not_found,
     wrapped_exports,
@@ -199,7 +202,7 @@ def Relation(self):
         return self.db_wrapper.Relation
 
     @abc.abstractmethod
-    def __call__(self, *args: str) -> Union[str, RelationProxy]:
+    def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
         pass
 
 
@@ -265,6 +268,41 @@ def __call__(self, *args: str) -> RelationProxy:
         return self.resolve(args[0], args[1])
 
 
+class BaseMetricResolver(BaseResolver):
+    def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
+        ...
+
+    def _repack_args(self, name: str, package: Optional[str]) -> List[str]:
+        if package is None:
+            return [name]
+        else:
+            return [package, name]
+
+    def validate_args(self, name: str, package: Optional[str]):
+        if not isinstance(name, str):
+            raise CompilationException(
+                f"The name argument to metric() must be a string, got {type(name)}"
+            )
+
+        if package is not None and not isinstance(package, str):
+            raise CompilationException(
+                f"The package argument to metric() must be a string or None, got {type(package)}"
+            )
+
+    def __call__(self, *args: str) -> MetricReference:
+        name: str
+        package: Optional[str] = None
+
+        if len(args) == 1:
+            name = args[0]
+        elif len(args) == 2:
+            package, name = args
+        else:
+            metric_invalid_args(self.model, args)
+        self.validate_args(name, package)
+        return self.resolve(name, package)
+
+
 class Config(Protocol):
     def __init__(self, model, context_config: Optional[ContextConfig]):
         ...
@@ -511,6 +549,34 @@ def resolve(self, source_name: str, table_name: str):
         return self.Relation.create_from_source(target_source)
 
 
+# metric` implementations
+class ParseMetricResolver(BaseMetricResolver):
+    def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
+        self.model.metrics.append(self._repack_args(name, package))
+
+        return MetricReference(name, package)
+
+
+class RuntimeMetricResolver(BaseMetricResolver):
+    def resolve(self, target_name: str, target_package: Optional[str] = None) -> MetricReference:
+        target_metric = self.manifest.resolve_metric(
+            target_name,
+            target_package,
+            self.current_project,
+            self.model.package_name,
+        )
+
+        if target_metric is None or isinstance(target_metric, Disabled):
+            # TODO : Use a different exception!!
+            metric_target_not_found(
+                self.model,
+                target_name,
+                target_package,
+            )
+
+        return ResolvedMetricReference(target_metric, self.manifest, self.Relation)
+
+
 # `var` implementations.
 class ModelConfiguredVar(Var):
     def __init__(
@@ -568,6 +634,7 @@ class Provider(Protocol):
     Var: Type[ModelConfiguredVar]
     ref: Type[BaseRefResolver]
     source: Type[BaseSourceResolver]
+    metric: Type[BaseMetricResolver]
 
 
 class ParseProvider(Provider):
@@ -577,6 +644,7 @@ class ParseProvider(Provider):
     Var = ParseVar
     ref = ParseRefResolver
     source = ParseSourceResolver
+    metric = ParseMetricResolver
 
 
 class GenerateNameProvider(Provider):
@@ -586,6 +654,7 @@ class GenerateNameProvider(Provider):
     Var = RuntimeVar
     ref = ParseRefResolver
     source = ParseSourceResolver
+    metric = ParseMetricResolver
 
 
 class RuntimeProvider(Provider):
@@ -595,6 +664,7 @@ class RuntimeProvider(Provider):
     Var = RuntimeVar
     ref = RuntimeRefResolver
     source = RuntimeSourceResolver
+    metric = RuntimeMetricResolver
 
 
 class OperationProvider(RuntimeProvider):
@@ -778,6 +848,10 @@ def ref(self) -> Callable:
     def source(self) -> Callable:
         return self.provider.source(self.db_wrapper, self.model, self.config, self.manifest)
 
+    @contextproperty
+    def metric(self) -> Callable:
+        return self.provider.metric(self.db_wrapper, self.model, self.config, self.manifest)
+
     @contextproperty("config")
     def ctx_config(self) -> Config:
         """The `config` variable exists to handle end-user configuration for
@@ -1355,7 +1429,7 @@ def validate_args(self, name, package):
         if not isinstance(name, str):
             raise ParsingException(
                 f"In a metrics section in {self.model.original_file_path} "
-                f"the name argument to ref() must be a string"
+                "the name argument to ref() must be a string"
             )
 
 
@@ -1373,6 +1447,12 @@ def generate_parse_metrics(
             project,
             manifest,
         ),
+        "metric": ParseMetricResolver(
+            None,
+            metric,
+            project,
+            manifest,
+        ),
     }
 
 

diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py
@@ -183,6 +183,39 @@ def perform_lookup(self, unique_id: UniqueID, manifest) -> ManifestNode:
         return manifest.nodes[unique_id]
 
 
+class MetricLookup(dbtClassMixin):
+    def __init__(self, manifest: "Manifest"):
+        self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
+        self.populate(manifest)
+
+    def get_unique_id(self, search_name, package: Optional[PackageName]):
+        return find_unique_id_for_package(self.storage, search_name, package)
+
+    def find(self, search_name, package: Optional[PackageName], manifest: "Manifest"):
+        unique_id = self.get_unique_id(search_name, package)
+        if unique_id is not None:
+            return self.perform_lookup(unique_id, manifest)
+        return None
+
+    def add_metric(self, metric: ParsedMetric):
+        if metric.search_name not in self.storage:
+            self.storage[metric.search_name] = {}
+
+        self.storage[metric.search_name][metric.package_name] = metric.unique_id
+
+    def populate(self, manifest):
+        for metric in manifest.metrics.values():
+            if hasattr(metric, "name"):
+                self.add_metric(metric)
+
+    def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> ParsedMetric:
+        if unique_id not in manifest.metrics:
+            raise dbt.exceptions.InternalException(
+                f"Metric {unique_id} found in cache but not found in manifest"
+            )
+        return manifest.metrics[unique_id]
+
+
 # This handles both models/seeds/snapshots and sources
 class DisabledLookup(dbtClassMixin):
     def __init__(self, manifest: "Manifest"):
@@ -431,6 +464,9 @@ class Disabled(Generic[D]):
     target: D
 
 
+MaybeMetricNode = Optional[ParsedMetric]
+
+
 MaybeDocumentation = Optional[ParsedDocumentation]
 
 
@@ -592,6 +628,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
     _ref_lookup: Optional[RefableLookup] = field(
         default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
     )
+    _metric_lookup: Optional[MetricLookup] = field(
+        default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
+    )
     _disabled_lookup: Optional[DisabledLookup] = field(
         default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
     )
@@ -837,6 +876,12 @@ def ref_lookup(self) -> RefableLookup:
             self._ref_lookup = RefableLookup(self)
         return self._ref_lookup
 
+    @property
+    def metric_lookup(self) -> MetricLookup:
+        if self._metric_lookup is None:
+            self._metric_lookup = MetricLookup(self)
+        return self._metric_lookup
+
     def rebuild_ref_lookup(self):
         self._ref_lookup = RefableLookup(self)
 
@@ -912,6 +957,22 @@ def resolve_source(
             return Disabled(disabled[0])
         return None
 
+    def resolve_metric(
+        self,
+        target_metric_name: str,
+        target_metric_package: Optional[str],
+        current_project: str,
+        node_package: str,
+    ) -> MaybeMetricNode:
+        metric: Optional[ParsedMetric] = None
+
+        candidates = _search_packages(current_project, node_package, target_metric_package)
+        for pkg in candidates:
+            metric = self.metric_lookup.find(target_metric_name, pkg, self)
+            if metric is not None:
+                return metric
+        return None
+
     # Called by DocsRuntimeContext.doc
     def resolve_doc(
         self,
@@ -1076,6 +1137,7 @@ def __reduce_ex__(self, protocol):
             self._doc_lookup,
             self._source_lookup,
             self._ref_lookup,
+            self._metric_lookup,
             self._disabled_lookup,
             self._analysis_lookup,
         )
@@ -1095,7 +1157,7 @@ def __init__(self, macros):
 
 
 @dataclass
-@schema_version("manifest", 5)
+@schema_version("manifest", 6)
 class WritableManifest(ArtifactMixin):
     nodes: Mapping[UniqueID, ManifestNode] = field(
         metadata=dict(description=("The nodes defined in the dbt project and its dependencies"))
@@ -1141,7 +1203,7 @@ class WritableManifest(ArtifactMixin):
 
     @classmethod
     def compatible_previous_versions(self):
-        return [("manifest", 4)]
+        return [("manifest", 4), ("manifest", 5)]
 
     def __post_serialize__(self, dct):
         for unique_id, node in dct["nodes"].items():

diff --git a/core/dbt/contracts/graph/metrics.py b/core/dbt/contracts/graph/metrics.py
@@ -0,0 +1,70 @@
+from dbt.node_types import NodeType
+
+
+class MetricReference(object):
+    def __init__(self, metric_name, package_name=None):
+        self.metric_name = metric_name
+        self.package_name = package_name
+
+    def __str__(self):
+        return f"{self.metric_name}"
+
+
+class ResolvedMetricReference(MetricReference):
+    """
+    Simple proxy over a ParsedMetric which delegates property
+    lookups to the underlying node. Also adds helper functions
+    for working with metrics (ie. __str__ and templating functions)
+    """
+
+    def __init__(self, node, manifest, Relation):
+        super().__init__(node.name, node.package_name)
+        self.node = node
+        self.manifest = manifest
+        self.Relation = Relation
+
+    def __getattr__(self, key):
+        return getattr(self.node, key)
+
+    def __str__(self):
+        return f"{self.node.name}"
+
+    @classmethod
+    def parent_metrics(cls, metric_node, manifest):
+        yield metric_node
+
+        for parent_unique_id in metric_node.depends_on.nodes:
+            node = manifest.metrics.get(parent_unique_id)
+            if node and node.resource_type == NodeType.Metric:
+                yield from cls.parent_metrics(node, manifest)
+
+    def parent_models(self):
+        in_scope_metrics = list(self.parent_metrics(self.node, self.manifest))
+
+        to_return = {
+            "base": [],
+            "derived": [],
+        }
+        for metric in in_scope_metrics:
+            if metric.type == "expression":
+                to_return["derived"].append(
+                    {"metric_source": None, "metric": metric, "is_derived": True}
+                )
+            else:
+                for node_unique_id in metric.depends_on.nodes:
+                    node = self.manifest.nodes.get(node_unique_id)
+                    if node and node.resource_type in NodeType.refable():
+                        to_return["base"].append(
+                            {
+                                "metric_relation_node": node,
+                                "metric_relation": self.Relation.create(
+                                    database=node.database,
+                                    schema=node.schema,
+                                    identifier=node.alias,
+                                ),
+                                "metric": metric,
+                                "is_derived": False,
+                            }
+                        )
+
+        return to_return