From e35e2a3b9cea252ca989ba7f308c8395cae5837d Mon Sep 17 00:00:00 2001 From: davidtsuk <132949946+davidtsuk@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:20:53 -0800 Subject: [PATCH] fix(eap): Fix divide by 0 errors caused when the sample count is 0 (#6681) Fixes https://github.com/getsentry/eap-planning/issues/132 --- snuba/web/rpc/common/aggregation.py | 12 +++-- snuba/web/rpc/v1/endpoint_time_series.py | 17 +++--- snuba/web/rpc/v1/endpoint_trace_item_table.py | 3 +- tests/web/rpc/test_aggregation.py | 1 + ...test_endpoint_time_series_extrapolation.py | 52 +++++++++++++++++++ 5 files changed, 74 insertions(+), 11 deletions(-) diff --git a/snuba/web/rpc/common/aggregation.py b/snuba/web/rpc/common/aggregation.py index 228f3dd05a..5d248d51cd 100644 --- a/snuba/web/rpc/common/aggregation.py +++ b/snuba/web/rpc/common/aggregation.py @@ -40,13 +40,16 @@ class ExtrapolationMeta: avg_sampling_rate: float @staticmethod - def from_row(row_data: Dict[str, Any], column_label: str) -> "ExtrapolationMeta": + def from_row( + row_data: Dict[str, Any], column_label: str + ) -> "ExtrapolationMeta | None": """ Computes the reliability and average sample rate for a column based on the extrapolation columns. + If the sample count is 0, we return None as we don't have any data to extrapolate. """ confidence_interval = None average_sample_rate = 0 - sample_count = None + sample_count = 0 is_percentile = False percentile = 0.0 granularity = 0.0 @@ -87,7 +90,10 @@ def from_row(row_data: Dict[str, Any], column_label: str) -> "ExtrapolationMeta" sample_count = col_value reliability = Reliability.RELIABILITY_UNSPECIFIED - if confidence_interval is not None and sample_count is not None: + if confidence_interval is not None: + if sample_count == 0: + return None + estimate = row_data[column_label] # relative confidence represents the ratio of the confidence interval to the estimate (by default it is the upper bound) if is_percentile: diff --git a/snuba/web/rpc/v1/endpoint_time_series.py b/snuba/web/rpc/v1/endpoint_time_series.py index 1b87017146..d263852fe2 100644 --- a/snuba/web/rpc/v1/endpoint_time_series.py +++ b/snuba/web/rpc/v1/endpoint_time_series.py @@ -173,14 +173,17 @@ def _convert_result_timeseries( extrapolation_meta = ExtrapolationMeta.from_row( row_data, timeseries.label ) - timeseries.data_points.append( - DataPoint( - data=row_data[timeseries.label], - data_present=True, - avg_sampling_rate=extrapolation_meta.avg_sampling_rate, - reliability=extrapolation_meta.reliability, + if extrapolation_meta is not None: + timeseries.data_points.append( + DataPoint( + data=row_data[timeseries.label], + data_present=True, + avg_sampling_rate=extrapolation_meta.avg_sampling_rate, + reliability=extrapolation_meta.reliability, + ) ) - ) + else: + timeseries.data_points.append(DataPoint(data=0, data_present=False)) return result_timeseries.values() diff --git a/snuba/web/rpc/v1/endpoint_trace_item_table.py b/snuba/web/rpc/v1/endpoint_trace_item_table.py index bcc3578257..7bece3f811 100644 --- a/snuba/web/rpc/v1/endpoint_trace_item_table.py +++ b/snuba/web/rpc/v1/endpoint_trace_item_table.py @@ -212,7 +212,8 @@ def _convert_results( res[column_name].attribute_name = column_name extrapolation_meta = ExtrapolationMeta.from_row(row, column_name) if ( - extrapolation_meta.reliability + extrapolation_meta is not None + and extrapolation_meta.reliability != Reliability.RELIABILITY_UNSPECIFIED ): res[column_name].reliabilities.append( diff --git a/tests/web/rpc/test_aggregation.py b/tests/web/rpc/test_aggregation.py index e582dd0edc..e6ba7e3c42 100644 --- a/tests/web/rpc/test_aggregation.py +++ b/tests/web/rpc/test_aggregation.py @@ -147,6 +147,7 @@ def test_get_extrapolation_meta( reliability: Reliability.ValueType, ) -> None: extrapolation_meta = ExtrapolationMeta.from_row(row_data, column_name) + assert extrapolation_meta is not None assert extrapolation_meta.avg_sampling_rate == average_sample_rate assert extrapolation_meta.reliability == reliability diff --git a/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_extrapolation.py b/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_extrapolation.py index f53c40aabd..a12ff4c226 100644 --- a/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_extrapolation.py +++ b/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_extrapolation.py @@ -301,6 +301,58 @@ def test_confidence_interval_zero_estimate(self) -> None: ) ] + def test_confidence_interval_no_samples(self) -> None: + # store a a test metric with a value of 1, every second for an hour + granularity_secs = 120 + query_duration = 3600 + store_timeseries( + BASE_TIME, + 1, + 3600, + metrics=[DummyMetric("test_metric", get_value=lambda x: 0)], + measurements=[ + DummyMeasurement("client_sample_rate", get_value=lambda s: 1) + ], + ) + + message = TimeSeriesRequest( + meta=RequestMeta( + project_ids=[1, 2, 3], + organization_id=1, + cogs_category="something", + referrer="something", + start_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp())), + end_timestamp=Timestamp( + seconds=int(BASE_TIME.timestamp() + query_duration) + ), + ), + aggregations=[ + AttributeAggregation( + aggregate=Function.FUNCTION_P50, + key=AttributeKey( + type=AttributeKey.TYPE_FLOAT, name="test_metric2" + ), # non-existent metric + label="p50(test_metric2)", + extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_SAMPLE_WEIGHTED, + ), + ], + granularity_secs=granularity_secs, + ) + response = EndpointTimeSeries().execute(message) + expected_buckets = [ + Timestamp(seconds=int(BASE_TIME.timestamp()) + secs) + for secs in range(0, query_duration, granularity_secs) + ] + assert sorted(response.result_timeseries, key=lambda x: x.label) == [ + TimeSeries( + label="p50(test_metric2)", + buckets=expected_buckets, + data_points=[ + DataPoint(data_present=False) for _ in range(len(expected_buckets)) + ], + ) + ] + def test_count_unreliable(self) -> None: # store a a test metric with a value of 1, every second for an hour granularity_secs = 120