Skip to content

Commit

Permalink
fix(eap): Fix divide by 0 errors caused when the sample count is 0 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
davidtsuk authored Dec 17, 2024
1 parent 4465733 commit e35e2a3
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 11 deletions.
12 changes: 9 additions & 3 deletions snuba/web/rpc/common/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,16 @@ class ExtrapolationMeta:
avg_sampling_rate: float

@staticmethod
def from_row(row_data: Dict[str, Any], column_label: str) -> "ExtrapolationMeta":
def from_row(
row_data: Dict[str, Any], column_label: str
) -> "ExtrapolationMeta | None":
"""
Computes the reliability and average sample rate for a column based on the extrapolation columns.
If the sample count is 0, we return None as we don't have any data to extrapolate.
"""
confidence_interval = None
average_sample_rate = 0
sample_count = None
sample_count = 0
is_percentile = False
percentile = 0.0
granularity = 0.0
Expand Down Expand Up @@ -87,7 +90,10 @@ def from_row(row_data: Dict[str, Any], column_label: str) -> "ExtrapolationMeta"
sample_count = col_value

reliability = Reliability.RELIABILITY_UNSPECIFIED
if confidence_interval is not None and sample_count is not None:
if confidence_interval is not None:
if sample_count == 0:
return None

estimate = row_data[column_label]
# relative confidence represents the ratio of the confidence interval to the estimate (by default it is the upper bound)
if is_percentile:
Expand Down
17 changes: 10 additions & 7 deletions snuba/web/rpc/v1/endpoint_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,17 @@ def _convert_result_timeseries(
extrapolation_meta = ExtrapolationMeta.from_row(
row_data, timeseries.label
)
timeseries.data_points.append(
DataPoint(
data=row_data[timeseries.label],
data_present=True,
avg_sampling_rate=extrapolation_meta.avg_sampling_rate,
reliability=extrapolation_meta.reliability,
if extrapolation_meta is not None:
timeseries.data_points.append(
DataPoint(
data=row_data[timeseries.label],
data_present=True,
avg_sampling_rate=extrapolation_meta.avg_sampling_rate,
reliability=extrapolation_meta.reliability,
)
)
)
else:
timeseries.data_points.append(DataPoint(data=0, data_present=False))
return result_timeseries.values()


Expand Down
3 changes: 2 additions & 1 deletion snuba/web/rpc/v1/endpoint_trace_item_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ def _convert_results(
res[column_name].attribute_name = column_name
extrapolation_meta = ExtrapolationMeta.from_row(row, column_name)
if (
extrapolation_meta.reliability
extrapolation_meta is not None
and extrapolation_meta.reliability
!= Reliability.RELIABILITY_UNSPECIFIED
):
res[column_name].reliabilities.append(
Expand Down
1 change: 1 addition & 0 deletions tests/web/rpc/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def test_get_extrapolation_meta(
reliability: Reliability.ValueType,
) -> None:
extrapolation_meta = ExtrapolationMeta.from_row(row_data, column_name)
assert extrapolation_meta is not None
assert extrapolation_meta.avg_sampling_rate == average_sample_rate
assert extrapolation_meta.reliability == reliability

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,58 @@ def test_confidence_interval_zero_estimate(self) -> None:
)
]

def test_confidence_interval_no_samples(self) -> None:
# store a a test metric with a value of 1, every second for an hour
granularity_secs = 120
query_duration = 3600
store_timeseries(
BASE_TIME,
1,
3600,
metrics=[DummyMetric("test_metric", get_value=lambda x: 0)],
measurements=[
DummyMeasurement("client_sample_rate", get_value=lambda s: 1)
],
)

message = TimeSeriesRequest(
meta=RequestMeta(
project_ids=[1, 2, 3],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp())),
end_timestamp=Timestamp(
seconds=int(BASE_TIME.timestamp() + query_duration)
),
),
aggregations=[
AttributeAggregation(
aggregate=Function.FUNCTION_P50,
key=AttributeKey(
type=AttributeKey.TYPE_FLOAT, name="test_metric2"
), # non-existent metric
label="p50(test_metric2)",
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_SAMPLE_WEIGHTED,
),
],
granularity_secs=granularity_secs,
)
response = EndpointTimeSeries().execute(message)
expected_buckets = [
Timestamp(seconds=int(BASE_TIME.timestamp()) + secs)
for secs in range(0, query_duration, granularity_secs)
]
assert sorted(response.result_timeseries, key=lambda x: x.label) == [
TimeSeries(
label="p50(test_metric2)",
buckets=expected_buckets,
data_points=[
DataPoint(data_present=False) for _ in range(len(expected_buckets))
],
)
]

def test_count_unreliable(self) -> None:
# store a a test metric with a value of 1, every second for an hour
granularity_secs = 120
Expand Down

0 comments on commit e35e2a3

Please sign in to comment.