-
Notifications
You must be signed in to change notification settings - Fork 94
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve cache for MetricLookup.linkable_elements_for_measure()
#1443
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
import functools | ||
import logging | ||
import time | ||
from typing import Dict, Optional, Sequence, Set | ||
from typing import Dict, Optional, Sequence, Set, Tuple | ||
|
||
from dbt_semantic_interfaces.enum_extension import assert_values_exhausted | ||
from dbt_semantic_interfaces.protocols.metric import Metric, MetricInputMeasure, MetricType | ||
|
@@ -13,6 +13,7 @@ | |
|
||
from metricflow_semantics.errors.error_classes import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError | ||
from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat | ||
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty | ||
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter | ||
from metricflow_semantics.model.semantics.linkable_element_set import LinkableElementSet | ||
from metricflow_semantics.model.semantics.linkable_spec_resolver import ( | ||
|
@@ -62,25 +63,43 @@ def __init__( | |
MetricReference, Sequence[TimeDimensionSpec] | ||
] = {} | ||
|
||
@functools.lru_cache | ||
# Cache for `linkable_elements_for_measure()`. | ||
self._linkable_element_set_for_measure_cache: Dict[ | ||
Tuple[MeasureReference, LinkableElementFilter], LinkableElementSet | ||
] = {} | ||
|
||
def linkable_elements_for_measure( | ||
self, | ||
measure_reference: MeasureReference, | ||
element_filter: LinkableElementFilter = LinkableElementFilter(), | ||
) -> LinkableElementSet: | ||
"""Return the set of linkable elements reachable from a given measure.""" | ||
start_time = time.time() | ||
linkable_element_set = self._linkable_spec_resolver.get_linkable_element_set_for_measure( | ||
measure_reference=measure_reference, | ||
element_filter=element_filter, | ||
) | ||
# Don't cache the result when group-by-metrics are selected as there can be many of them and may significantly | ||
# increase memory usage. | ||
if ( | ||
LinkableElementProperty.METRIC in element_filter.with_any_of | ||
and LinkableElementProperty.METRIC not in element_filter.without_any_of | ||
): | ||
return self._linkable_spec_resolver.get_linkable_element_set_for_measure(measure_reference, element_filter) | ||
|
||
cache_key = (measure_reference, element_filter) | ||
result = self._linkable_element_set_for_measure_cache.get(cache_key) | ||
if result is not None: | ||
return result | ||
|
||
result = self._linkable_spec_resolver.get_linkable_element_set_for_measure(measure_reference, element_filter) | ||
self._linkable_element_set_for_measure_cache[cache_key] = result | ||
|
||
logger.debug( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious why you removed this log? Seems helpful to see if there was improvement on performance for this method. Also seems like it would be a good one to make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method can potentially be called a lot during query parsing, so it's easier to assess the runtime through DD tooling. e.g. a query with ~100 metrics, ~100 group-by items (including ones in filters) leads to a lot of log lines. Could leave a debug line in there though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added that debug line. |
||
LazyFormat( | ||
lambda: f"Getting valid linkable elements for measure '{measure_reference.element_name}' took: {time.time() - start_time:.2f}s" | ||
"Finished getting linkable elements", | ||
measure_reference=measure_reference, | ||
element_filter=element_filter, | ||
runtime=time.time() - start_time, | ||
) | ||
) | ||
|
||
return linkable_element_set | ||
return result | ||
|
||
@functools.lru_cache | ||
def linkable_elements_for_no_metrics_query( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we remove the cache for group by metrics, doesn't that mean this will be super slow when we try to fetch group by metric options?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had some concerns about memory usage, so caching of group-by-metrics is handled later through a separate cache that's LRU and avoids GC issues.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it 👍