Skip to content

Commit

Permalink
Handle confidence intervals with no matching data
Browse files Browse the repository at this point in the history
  • Loading branch information
davidtsuk committed Dec 16, 2024
1 parent d5b949e commit cb27336
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 11 deletions.
10 changes: 7 additions & 3 deletions snuba/web/rpc/common/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ class ExtrapolationMeta:
avg_sampling_rate: float

@staticmethod
def from_row(row_data: Dict[str, Any], column_label: str) -> "ExtrapolationMeta":
def from_row(
row_data: Dict[str, Any], column_label: str
) -> "ExtrapolationMeta | None":
"""
Computes the reliability and average sample rate for a column based on the extrapolation columns.
"""
Expand Down Expand Up @@ -87,7 +89,7 @@ def from_row(row_data: Dict[str, Any], column_label: str) -> "ExtrapolationMeta"
sample_count = col_value

reliability = Reliability.RELIABILITY_UNSPECIFIED
if confidence_interval is not None and sample_count is not None:
if confidence_interval is not None and sample_count != 0:
estimate = row_data[column_label]
# relative confidence represents the ratio of the confidence interval to the estimate (by default it is the upper bound)
if is_percentile:
Expand Down Expand Up @@ -124,7 +126,9 @@ def from_row(row_data: Dict[str, Any], column_label: str) -> "ExtrapolationMeta"
else Reliability.RELIABILITY_LOW
)

return ExtrapolationMeta(reliability, average_sample_rate)
return ExtrapolationMeta(reliability, average_sample_rate)

return None


@dataclass(frozen=True)
Expand Down
17 changes: 10 additions & 7 deletions snuba/web/rpc/v1/endpoint_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,17 @@ def _convert_result_timeseries(
extrapolation_meta = ExtrapolationMeta.from_row(
row_data, timeseries.label
)
timeseries.data_points.append(
DataPoint(
data=row_data[timeseries.label],
data_present=True,
avg_sampling_rate=extrapolation_meta.avg_sampling_rate,
reliability=extrapolation_meta.reliability,
if extrapolation_meta is not None:
timeseries.data_points.append(
DataPoint(
data=row_data[timeseries.label],
data_present=True,
avg_sampling_rate=extrapolation_meta.avg_sampling_rate,
reliability=extrapolation_meta.reliability,
)
)
)
else:
timeseries.data_points.append(DataPoint(data=0, data_present=False))
return result_timeseries.values()


Expand Down
3 changes: 2 additions & 1 deletion snuba/web/rpc/v1/endpoint_trace_item_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ def _convert_results(
res[column_name].attribute_name = column_name
extrapolation_meta = ExtrapolationMeta.from_row(row, column_name)
if (
extrapolation_meta.reliability
extrapolation_meta is not None
and extrapolation_meta.reliability
!= Reliability.RELIABILITY_UNSPECIFIED
):
res[column_name].reliabilities.append(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,58 @@ def test_confidence_interval_zero_estimate(self) -> None:
)
]

def test_confidence_interval_no_samples(self) -> None:
# store a a test metric with a value of 1, every second for an hour
granularity_secs = 120
query_duration = 3600
store_timeseries(
BASE_TIME,
1,
3600,
metrics=[DummyMetric("test_metric", get_value=lambda x: 0)],
measurements=[
DummyMeasurement("client_sample_rate", get_value=lambda s: 1)
],
)

message = TimeSeriesRequest(
meta=RequestMeta(
project_ids=[1, 2, 3],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp())),
end_timestamp=Timestamp(
seconds=int(BASE_TIME.timestamp() + query_duration)
),
),
aggregations=[
AttributeAggregation(
aggregate=Function.FUNCTION_P50,
key=AttributeKey(
type=AttributeKey.TYPE_FLOAT, name="test_metric2"
), # non-existent metric
label="p50(test_metric2)",
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_SAMPLE_WEIGHTED,
),
],
granularity_secs=granularity_secs,
)
response = EndpointTimeSeries().execute(message)
expected_buckets = [
Timestamp(seconds=int(BASE_TIME.timestamp()) + secs)
for secs in range(0, query_duration, granularity_secs)
]
assert sorted(response.result_timeseries, key=lambda x: x.label) == [
TimeSeries(
label="p50(test_metric2)",
buckets=expected_buckets,
data_points=[
DataPoint(data_present=False) for _ in range(len(expected_buckets))
],
)
]

def test_count_unreliable(self) -> None:
# store a a test metric with a value of 1, every second for an hour
granularity_secs = 120
Expand Down

0 comments on commit cb27336

Please sign in to comment.