Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(#4884) Add support for ratio metrics #5027

Merged
merged 32 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
72cad80
wip
drewbanin Mar 16, 2022
cf23d65
Merge branch 'main' of github.com:fishtown-analytics/dbt into feature…
drewbanin Apr 7, 2022
d43e859
Merge branch 'main' into feature/metric-improvements
drewbanin Apr 8, 2022
8dcfeb1
More support for ratio metrics
drewbanin Apr 11, 2022
aae1c81
Formatting and linting
drewbanin Apr 13, 2022
4d4198c
Fix unit tests
drewbanin Apr 14, 2022
a57d3b0
Support disabling metrics
drewbanin Apr 14, 2022
395393e
mypy
drewbanin Apr 14, 2022
55e7ab7
address all TODOs
drewbanin Apr 14, 2022
a9e839e
make pypy happy
drewbanin Apr 14, 2022
3a8385b
Merge branch 'main' into feature/metric-improvements
drewbanin May 31, 2022
db35e88
wip
drewbanin Jun 2, 2022
d6e886c
checkpoint
drewbanin Jun 3, 2022
7af4c51
refactor, remove ratio_terms
drewbanin Jun 3, 2022
32c3c53
flake8 and unit tests
drewbanin Jun 3, 2022
0344a20
remove debugger
drewbanin Jun 8, 2022
bee3eea
quickfix for filters
drewbanin Jun 9, 2022
934605e
Experiment with functional testing for 'expression' metrics
jtcohen6 Jun 16, 2022
06eb926
reformatting slightly
callum-mcdata Jun 28, 2022
4461d7a
make file and mypy fix
emmyoop Jun 29, 2022
31cfa7f
remove config from metrics - wip
emmyoop Jun 29, 2022
60ef314
add metrics back to context
emmyoop Jun 29, 2022
cd957a6
adding test changes
callum-mcdata Jun 29, 2022
ba3a78c
fixing test metrics
callum-mcdata Jun 29, 2022
54c38d0
revert name audit
emmyoop Jun 29, 2022
7b03989
Merge branch 'feature/metric-improvements' of https://github.com/dbt-…
emmyoop Jun 29, 2022
46679e2
pre-commit fixes
emmyoop Jun 29, 2022
b8a7f99
add changelog
emmyoop Jul 5, 2022
e235ab7
Bumping manifest version to v6 (#5430)
leahwicz Jul 5, 2022
c28628f
Merge branch 'main' of https://github.com/dbt-labs/dbt-core into feat…
emmyoop Jul 5, 2022
68f7708
add v5 to backwards compatibility
emmyoop Jul 5, 2022
e808272
Clean up test_previous_version_state, update for v6 (#5440)
jtcohen6 Jul 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20220705-083026.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Add support for ratio metrics
time: 2022-07-05T08:30:26.494837-05:00
custom:
Author: drewbanin callum-mcdata
Issue: "4884"
PR: "5027"
7 changes: 7 additions & 0 deletions .changes/unreleased/Under the Hood-20220705-142120.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Under the Hood
body: Bump manifest version to v6
time: 2022-07-05T14:21:20.66768-04:00
custom:
Author: leahwicz
Issue: "5417"
PR: "5430"
2 changes: 2 additions & 0 deletions core/dbt/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ def link_node(self, linker: Linker, node: GraphMemberNode, manifest: Manifest):
linker.dependency(node.unique_id, (manifest.nodes[dependency].unique_id))
elif dependency in manifest.sources:
linker.dependency(node.unique_id, (manifest.sources[dependency].unique_id))
elif dependency in manifest.metrics:
linker.dependency(node.unique_id, (manifest.metrics[dependency].unique_id))
else:
dependency_not_found(node, dependency)

Expand Down
84 changes: 82 additions & 2 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
ParsedSeedNode,
ParsedSourceDefinition,
)
from dbt.contracts.graph.metrics import MetricReference, ResolvedMetricReference
from dbt.exceptions import (
CompilationException,
ParsingException,
Expand All @@ -50,7 +51,9 @@
missing_config,
raise_compiler_error,
ref_invalid_args,
metric_invalid_args,
ref_target_not_found,
metric_target_not_found,
ref_bad_context,
source_target_not_found,
wrapped_exports,
Expand Down Expand Up @@ -199,7 +202,7 @@ def Relation(self):
return self.db_wrapper.Relation

@abc.abstractmethod
def __call__(self, *args: str) -> Union[str, RelationProxy]:
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
pass


Expand Down Expand Up @@ -265,6 +268,41 @@ def __call__(self, *args: str) -> RelationProxy:
return self.resolve(args[0], args[1])


class BaseMetricResolver(BaseResolver):
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
...

def _repack_args(self, name: str, package: Optional[str]) -> List[str]:
if package is None:
return [name]
else:
return [package, name]

def validate_args(self, name: str, package: Optional[str]):
if not isinstance(name, str):
raise CompilationException(
f"The name argument to metric() must be a string, got {type(name)}"
)

if package is not None and not isinstance(package, str):
raise CompilationException(
f"The package argument to metric() must be a string or None, got {type(package)}"
)

def __call__(self, *args: str) -> MetricReference:
name: str
package: Optional[str] = None

if len(args) == 1:
name = args[0]
elif len(args) == 2:
package, name = args
else:
metric_invalid_args(self.model, args)
self.validate_args(name, package)
return self.resolve(name, package)


class Config(Protocol):
def __init__(self, model, context_config: Optional[ContextConfig]):
...
Expand Down Expand Up @@ -511,6 +549,34 @@ def resolve(self, source_name: str, table_name: str):
return self.Relation.create_from_source(target_source)


# metric` implementations
class ParseMetricResolver(BaseMetricResolver):
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
self.model.metrics.append(self._repack_args(name, package))

return MetricReference(name, package)


class RuntimeMetricResolver(BaseMetricResolver):
def resolve(self, target_name: str, target_package: Optional[str] = None) -> MetricReference:
target_metric = self.manifest.resolve_metric(
target_name,
target_package,
self.current_project,
self.model.package_name,
)

if target_metric is None or isinstance(target_metric, Disabled):
# TODO : Use a different exception!!
metric_target_not_found(
self.model,
target_name,
target_package,
)

return ResolvedMetricReference(target_metric, self.manifest, self.Relation)


# `var` implementations.
class ModelConfiguredVar(Var):
def __init__(
Expand Down Expand Up @@ -568,6 +634,7 @@ class Provider(Protocol):
Var: Type[ModelConfiguredVar]
ref: Type[BaseRefResolver]
source: Type[BaseSourceResolver]
metric: Type[BaseMetricResolver]


class ParseProvider(Provider):
Expand All @@ -577,6 +644,7 @@ class ParseProvider(Provider):
Var = ParseVar
ref = ParseRefResolver
source = ParseSourceResolver
metric = ParseMetricResolver


class GenerateNameProvider(Provider):
Expand All @@ -586,6 +654,7 @@ class GenerateNameProvider(Provider):
Var = RuntimeVar
ref = ParseRefResolver
source = ParseSourceResolver
metric = ParseMetricResolver


class RuntimeProvider(Provider):
Expand All @@ -595,6 +664,7 @@ class RuntimeProvider(Provider):
Var = RuntimeVar
ref = RuntimeRefResolver
source = RuntimeSourceResolver
metric = RuntimeMetricResolver


class OperationProvider(RuntimeProvider):
Expand Down Expand Up @@ -778,6 +848,10 @@ def ref(self) -> Callable:
def source(self) -> Callable:
return self.provider.source(self.db_wrapper, self.model, self.config, self.manifest)

@contextproperty
def metric(self) -> Callable:
return self.provider.metric(self.db_wrapper, self.model, self.config, self.manifest)

@contextproperty("config")
def ctx_config(self) -> Config:
"""The `config` variable exists to handle end-user configuration for
Expand Down Expand Up @@ -1355,7 +1429,7 @@ def validate_args(self, name, package):
if not isinstance(name, str):
raise ParsingException(
f"In a metrics section in {self.model.original_file_path} "
f"the name argument to ref() must be a string"
"the name argument to ref() must be a string"
)


Expand All @@ -1373,6 +1447,12 @@ def generate_parse_metrics(
project,
manifest,
),
"metric": ParseMetricResolver(
None,
metric,
project,
manifest,
),
}


Expand Down
66 changes: 64 additions & 2 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,39 @@ def perform_lookup(self, unique_id: UniqueID, manifest) -> ManifestNode:
return manifest.nodes[unique_id]


class MetricLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
self.populate(manifest)

def get_unique_id(self, search_name, package: Optional[PackageName]):
return find_unique_id_for_package(self.storage, search_name, package)

def find(self, search_name, package: Optional[PackageName], manifest: "Manifest"):
unique_id = self.get_unique_id(search_name, package)
if unique_id is not None:
return self.perform_lookup(unique_id, manifest)
return None

def add_metric(self, metric: ParsedMetric):
if metric.search_name not in self.storage:
self.storage[metric.search_name] = {}

self.storage[metric.search_name][metric.package_name] = metric.unique_id

def populate(self, manifest):
for metric in manifest.metrics.values():
if hasattr(metric, "name"):
self.add_metric(metric)

def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> ParsedMetric:
if unique_id not in manifest.metrics:
raise dbt.exceptions.InternalException(
f"Metric {unique_id} found in cache but not found in manifest"
)
return manifest.metrics[unique_id]


# This handles both models/seeds/snapshots and sources
class DisabledLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
Expand Down Expand Up @@ -434,6 +467,9 @@ class Disabled(Generic[D]):
target: D


MaybeMetricNode = Optional[ParsedMetric]


MaybeDocumentation = Optional[ParsedDocumentation]


Expand Down Expand Up @@ -595,6 +631,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
_ref_lookup: Optional[RefableLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_metric_lookup: Optional[MetricLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_disabled_lookup: Optional[DisabledLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
Expand Down Expand Up @@ -833,6 +872,12 @@ def ref_lookup(self) -> RefableLookup:
self._ref_lookup = RefableLookup(self)
return self._ref_lookup

@property
def metric_lookup(self) -> MetricLookup:
if self._metric_lookup is None:
self._metric_lookup = MetricLookup(self)
return self._metric_lookup

def rebuild_ref_lookup(self):
self._ref_lookup = RefableLookup(self)

Expand Down Expand Up @@ -908,6 +953,22 @@ def resolve_source(
return Disabled(disabled[0])
return None

def resolve_metric(
self,
target_metric_name: str,
target_metric_package: Optional[str],
current_project: str,
node_package: str,
) -> MaybeMetricNode:
metric: Optional[ParsedMetric] = None

candidates = _search_packages(current_project, node_package, target_metric_package)
for pkg in candidates:
metric = self.metric_lookup.find(target_metric_name, pkg, self)
if metric is not None:
return metric
return None

# Called by DocsRuntimeContext.doc
def resolve_doc(
self,
Expand Down Expand Up @@ -1072,6 +1133,7 @@ def __reduce_ex__(self, protocol):
self._doc_lookup,
self._source_lookup,
self._ref_lookup,
self._metric_lookup,
self._disabled_lookup,
self._analysis_lookup,
)
Expand All @@ -1091,7 +1153,7 @@ def __init__(self, macros):


@dataclass
@schema_version("manifest", 5)
@schema_version("manifest", 6)
class WritableManifest(ArtifactMixin):
nodes: Mapping[UniqueID, ManifestNode] = field(
metadata=dict(description=("The nodes defined in the dbt project and its dependencies"))
Expand Down Expand Up @@ -1137,7 +1199,7 @@ class WritableManifest(ArtifactMixin):

@classmethod
def compatible_previous_versions(self):
return [("manifest", 4)]
return [("manifest", 4), ("manifest", 5)]

def __post_serialize__(self, dct):
for unique_id, node in dct["nodes"].items():
Expand Down
70 changes: 70 additions & 0 deletions core/dbt/contracts/graph/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from dbt.node_types import NodeType


class MetricReference(object):
def __init__(self, metric_name, package_name=None):
self.metric_name = metric_name
self.package_name = package_name

def __str__(self):
return f"{self.metric_name}"


class ResolvedMetricReference(MetricReference):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was my take on a class analogous to the Relation object. I figured that this kind of object could be returned from the metric() function. It would help us avoid the need to find the metric in the graph manually like we do here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks reasonable to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets remove .metric_value based on conversation here.

"""
Simple proxy over a ParsedMetric which delegates property
lookups to the underlying node. Also adds helper functions
for working with metrics (ie. __str__ and templating functions)
"""

def __init__(self, node, manifest, Relation):
super().__init__(node.name, node.package_name)
self.node = node
self.manifest = manifest
self.Relation = Relation

def __getattr__(self, key):
return getattr(self.node, key)

def __str__(self):
return f"{self.node.name}"

@classmethod
def parent_metrics(cls, metric_node, manifest):
yield metric_node

for parent_unique_id in metric_node.depends_on.nodes:
node = manifest.metrics.get(parent_unique_id)
if node and node.resource_type == NodeType.Metric:
yield from cls.parent_metrics(node, manifest)

def parent_models(self):
in_scope_metrics = list(self.parent_metrics(self.node, self.manifest))

to_return = {
"base": [],
"derived": [],
}
for metric in in_scope_metrics:
if metric.type == "expression":
to_return["derived"].append(
{"metric_source": None, "metric": metric, "is_derived": True}
)
else:
for node_unique_id in metric.depends_on.nodes:
node = self.manifest.nodes.get(node_unique_id)
if node and node.resource_type in NodeType.refable():
to_return["base"].append(
{
"metric_relation_node": node,
"metric_relation": self.Relation.create(
database=node.database,
schema=node.schema,
identifier=node.alias,
),
"metric": metric,
"is_derived": False,
}
)

return to_return
Loading