From bdc92536fd9beb6fd94dcdfd7585580dd58570f5 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Mon, 7 Oct 2024 07:15:11 -0700 Subject: [PATCH] Support filtering of `LinkableElements` by element name (#1444) This PR updates `LinkableElementFilter` to include the element name. This allows for more efficient filtering before a spec pattern is applied to a `LinkableElementSet`. --- .../model/semantics/element_filter.py | 17 ++++++++++++++++- .../model/semantics/linkable_element_set.py | 10 ++++++++++ .../model/semantics/metric_lookup.py | 13 +++++++++---- .../candidate_push_down/push_down_visitor.py | 2 -- .../specs/patterns/entity_link_pattern.py | 13 ++++++++++--- .../specs/patterns/typed_patterns.py | 8 ++++++-- 6 files changed, 51 insertions(+), 12 deletions(-) diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/element_filter.py b/metricflow-semantics/metricflow_semantics/model/semantics/element_filter.py index 8d15ba5e72..f4f1bd0ed2 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/element_filter.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/element_filter.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import FrozenSet +from typing import FrozenSet, Optional from typing_extensions import Self, override @@ -13,13 +13,20 @@ class LinkableElementFilter(Mergeable): """Describes a way to filter the `LinkableElements` in a `LinkableElementSet`.""" + # A `None` value for element names means no filtering on element names. + element_names: Optional[FrozenSet[str]] = None with_any_of: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties() without_any_of: FrozenSet[LinkableElementProperty] = frozenset() without_all_of: FrozenSet[LinkableElementProperty] = frozenset() @override def merge(self: Self, other: LinkableElementFilter) -> LinkableElementFilter: + if self.element_names is None and other.element_names is None: + element_names = None + else: + element_names = (self.element_names or frozenset()).union(other.element_names or frozenset()) return LinkableElementFilter( + element_names=element_names, with_any_of=self.with_any_of.union(other.with_any_of), without_any_of=self.without_any_of.union(other.without_any_of), without_all_of=self.without_all_of.union(other.without_all_of), @@ -29,3 +36,11 @@ def merge(self: Self, other: LinkableElementFilter) -> LinkableElementFilter: @override def empty_instance(cls) -> LinkableElementFilter: return LinkableElementFilter() + + def without_element_names(self) -> LinkableElementFilter: + """Return this filter without the `element_names` filter set.""" + return LinkableElementFilter( + with_any_of=self.with_any_of, + without_any_of=self.without_any_of, + without_all_of=self.without_all_of, + ) diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py index 61977bef89..16cdef4f5b 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py @@ -229,6 +229,7 @@ def filter( a property in "without_any_of" set are removed. Lastly, any elements with all properties in without_all_of are removed. """ + element_names = element_filter.element_names with_any_of = element_filter.with_any_of without_any_of = element_filter.without_any_of without_all_of = element_filter.without_all_of @@ -238,6 +239,9 @@ def filter( key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = {} for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items(): + if element_names is not None and path_key.element_name not in element_names: + continue + filtered_linkable_dimensions = tuple( linkable_dimension for linkable_dimension in linkable_dimensions @@ -252,6 +256,9 @@ def filter( key_to_linkable_dimensions[path_key] = filtered_linkable_dimensions for path_key, linkable_entities in self.path_key_to_linkable_entities.items(): + if element_names is not None and path_key.element_name not in element_names: + continue + filtered_linkable_entities = tuple( linkable_entity for linkable_entity in linkable_entities @@ -266,6 +273,9 @@ def filter( key_to_linkable_entities[path_key] = filtered_linkable_entities for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items(): + if element_names is not None and path_key.element_name not in element_names: + continue + filtered_linkable_metrics = tuple( linkable_metric for linkable_metric in linkable_metrics diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py index 60be83001e..3af03fc209 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py @@ -83,12 +83,17 @@ def linkable_elements_for_measure( ): return self._linkable_spec_resolver.get_linkable_element_set_for_measure(measure_reference, element_filter) - cache_key = (measure_reference, element_filter) + # Cache the result without element names in the filter for better hit rates. + element_filter_without_element_names = element_filter.without_element_names() + cache_key = (measure_reference, element_filter_without_element_names) + result = self._linkable_element_set_for_measure_cache.get(cache_key) if result is not None: - return result + return result.filter(element_filter) - result = self._linkable_spec_resolver.get_linkable_element_set_for_measure(measure_reference, element_filter) + result = self._linkable_spec_resolver.get_linkable_element_set_for_measure( + measure_reference, element_filter_without_element_names + ) self._linkable_element_set_for_measure_cache[cache_key] = result logger.debug( @@ -99,7 +104,7 @@ def linkable_elements_for_measure( runtime=time.time() - start_time, ) ) - return result + return result.filter(element_filter) @functools.lru_cache def linkable_elements_for_no_metrics_query( diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py index 69347a6fab..479a4c1700 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py @@ -157,8 +157,6 @@ def __init__( suggestion_generator: If there are issues with matching patterns to specs, use this to generate suggestions that will go in the issue. source_spec_patterns: The patterns to apply to the specs available at the measure nodes. - with_any_property: Only consider group-by-items with these properties from the measure nodes. - without_any_property: Only consider group-by-items without any of these properties (see LinkableElementProperty). filter_location: If resolving a where filter item, where this filter was defined. """ diff --git a/metricflow-semantics/metricflow_semantics/specs/patterns/entity_link_pattern.py b/metricflow-semantics/metricflow_semantics/specs/patterns/entity_link_pattern.py index 8121f0267a..ed1c596ef2 100644 --- a/metricflow-semantics/metricflow_semantics/specs/patterns/entity_link_pattern.py +++ b/metricflow-semantics/metricflow_semantics/specs/patterns/entity_link_pattern.py @@ -3,7 +3,7 @@ import logging from dataclasses import dataclass from enum import Enum -from typing import Any, List, Optional, Sequence, Tuple +from typing import Any, FrozenSet, List, Optional, Sequence, Tuple from dbt_semantic_interfaces.references import EntityReference from dbt_semantic_interfaces.type_enums.date_part import DatePart @@ -154,10 +154,17 @@ def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[LinkableIns @property @override def element_pre_filter(self) -> LinkableElementFilter: + element_names: Optional[FrozenSet[str]] = None + if ParameterSetField.ELEMENT_NAME in self.parameter_set.fields_to_compare: + element_names = frozenset({self.parameter_set.element_name}) if self.parameter_set.element_name else None if ( self.parameter_set.metric_subquery_entity_links is None or len(self.parameter_set.metric_subquery_entity_links) == 0 ): - return LinkableElementFilter(without_any_of=frozenset({LinkableElementProperty.METRIC})) + return LinkableElementFilter( + element_names=element_names, without_any_of=frozenset({LinkableElementProperty.METRIC}) + ) - return LinkableElementFilter() + return LinkableElementFilter( + element_names=element_names, + ) diff --git a/metricflow-semantics/metricflow_semantics/specs/patterns/typed_patterns.py b/metricflow-semantics/metricflow_semantics/specs/patterns/typed_patterns.py index e461cc05e3..ffafc7561e 100644 --- a/metricflow-semantics/metricflow_semantics/specs/patterns/typed_patterns.py +++ b/metricflow-semantics/metricflow_semantics/specs/patterns/typed_patterns.py @@ -55,7 +55,9 @@ def from_call_parameter_set( # noqa: D102 @property @override def element_pre_filter(self) -> LinkableElementFilter: - return LinkableElementFilter(without_any_of=frozenset({LinkableElementProperty.METRIC})) + return super().element_pre_filter.merge( + LinkableElementFilter(without_any_of=frozenset({LinkableElementProperty.METRIC})) + ) @dataclass(frozen=True) @@ -101,7 +103,9 @@ def from_call_parameter_set( @property @override def element_pre_filter(self) -> LinkableElementFilter: - return LinkableElementFilter(without_any_of=frozenset({LinkableElementProperty.METRIC})) + return super().element_pre_filter.merge( + LinkableElementFilter(without_any_of=frozenset({LinkableElementProperty.METRIC})) + ) @dataclass(frozen=True)