From d11082b1a8438f9945c93e39d4ff3f5ac2e7b326 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Fri, 27 Dec 2024 16:25:53 -0500 Subject: [PATCH] feat: remove hardcoded eap_spans references in table and tiemseries RPCs --- snuba/web/rpc/common/common.py | 103 +++++------------- snuba/web/rpc/v1/endpoint_time_series.py | 11 +- snuba/web/rpc/v1/endpoint_trace_item_table.py | 11 +- 3 files changed, 34 insertions(+), 91 deletions(-) diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py index 923f0c9aa5..de08b62ccd 100644 --- a/snuba/web/rpc/common/common.py +++ b/snuba/web/rpc/common/common.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta -from typing import Final, Mapping, Sequence, Set +from typing import Sequence -from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta +from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemName from sentry_protos.snuba.v1.trace_item_attribute_pb2 import ( AttributeKey, VirtualColumnContext, @@ -11,8 +11,11 @@ TraceItemFilter, ) +from snuba.datasets.entities.entity_key import EntityKey +from snuba.datasets.entities.factory import get_entity from snuba.query import Query from snuba.query.conditions import combine_and_conditions, combine_or_conditions +from snuba.query.data_source.simple import Entity from snuba.query.dsl import Functions as f from snuba.query.dsl import ( and_cond, @@ -72,86 +75,25 @@ def transform(exp: Expression) -> Expression: query.transform_expressions(transform) -# These are the columns which aren't stored in attr_str_ nor attr_num_ in clickhouse -NORMALIZED_COLUMNS: Final[Mapping[str, AttributeKey.Type.ValueType]] = { - "sentry.organization_id": AttributeKey.Type.TYPE_INT, - "sentry.project_id": AttributeKey.Type.TYPE_INT, - "sentry.service": AttributeKey.Type.TYPE_STRING, - "sentry.span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "sentry.parent_span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "sentry.segment_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "sentry.segment_name": AttributeKey.Type.TYPE_STRING, - "sentry.is_segment": AttributeKey.Type.TYPE_BOOLEAN, - "sentry.duration_ms": AttributeKey.Type.TYPE_FLOAT, - "sentry.exclusive_time_ms": AttributeKey.Type.TYPE_FLOAT, - "sentry.retention_days": AttributeKey.Type.TYPE_INT, - "sentry.name": AttributeKey.Type.TYPE_STRING, - "sentry.sampling_weight": AttributeKey.Type.TYPE_FLOAT, - "sentry.sampling_factor": AttributeKey.Type.TYPE_FLOAT, - "sentry.timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, - "sentry.start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, - "sentry.end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, -} - -TIMESTAMP_COLUMNS: Final[Set[str]] = { - "sentry.timestamp", - "sentry.start_timestamp", - "sentry.end_timestamp", -} - - def attribute_key_to_expression(attr_key: AttributeKey) -> Expression: - def _build_label_mapping_key(attr_key: AttributeKey) -> str: - return attr_key.name + "_" + AttributeKey.Type.Name(attr_key.type) - if attr_key.type == AttributeKey.Type.TYPE_UNSPECIFIED: raise BadSnubaRPCRequestException( f"attribute key {attr_key.name} must have a type specified" ) - alias = _build_label_mapping_key(attr_key) - - if attr_key.name == "sentry.trace_id": - if attr_key.type == AttributeKey.Type.TYPE_STRING: - return f.CAST(column("trace_id"), "String", alias=alias) - raise BadSnubaRPCRequestException( - f"Attribute {attr_key.name} must be requested as a string, got {attr_key.type}" - ) - - if attr_key.name in TIMESTAMP_COLUMNS: - if attr_key.type == AttributeKey.Type.TYPE_STRING: - return f.CAST( - column(attr_key.name[len("sentry.") :]), "String", alias=alias - ) - if attr_key.type == AttributeKey.Type.TYPE_INT: - return f.CAST(column(attr_key.name[len("sentry.") :]), "Int64", alias=alias) - if attr_key.type == AttributeKey.Type.TYPE_FLOAT: - return f.CAST( - column(attr_key.name[len("sentry.") :]), "Float64", alias=alias - ) - raise BadSnubaRPCRequestException( - f"Attribute {attr_key.name} must be requested as a string, float, or integer, got {attr_key.type}" - ) + alias = attr_key.name + "_" + AttributeKey.Type.Name(attr_key.type) - if attr_key.name in NORMALIZED_COLUMNS: - if NORMALIZED_COLUMNS[attr_key.name] == attr_key.type: - return column(attr_key.name[len("sentry.") :], alias=attr_key.name) - raise BadSnubaRPCRequestException( - f"Attribute {attr_key.name} must be requested as {NORMALIZED_COLUMNS[attr_key.name]}, got {attr_key.type}" - ) - - # End of special handling, just send to the appropriate bucket if attr_key.type == AttributeKey.Type.TYPE_STRING: return SubscriptableReference( alias=alias, column=column("attr_str"), key=literal(attr_key.name) ) if attr_key.type == AttributeKey.Type.TYPE_FLOAT: return SubscriptableReference( - alias=alias, column=column("attr_num"), key=literal(attr_key.name) + alias=alias, column=column("attr_f64"), key=literal(attr_key.name) ) if attr_key.type == AttributeKey.Type.TYPE_INT: return f.CAST( SubscriptableReference( - alias=None, column=column("attr_num"), key=literal(attr_key.name) + alias=None, column=column("attr_i64"), key=literal(attr_key.name) ), "Int64", alias=alias, @@ -160,7 +102,7 @@ def _build_label_mapping_key(attr_key: AttributeKey) -> str: return f.CAST( SubscriptableReference( alias=None, - column=column("attr_num"), + column=column("attr_f64"), key=literal(attr_key.name), ), "Boolean", @@ -209,20 +151,19 @@ def apply_virtual_columns( mapped_column_to_context = {c.to_column_name: c for c in virtual_column_contexts} def transform_expressions(expression: Expression) -> Expression: - # virtual columns will show up as `attr_str[virtual_column_name]` or `attr_num[virtual_column_name]` + # virtual columns will show up as `attr_str[virtual_column_name]` if not isinstance(expression, SubscriptableReference): return expression if expression.column.column_name != "attr_str": return expression + context = mapped_column_to_context.get(str(expression.key.value)) if context: attribute_expression = attribute_key_to_expression( AttributeKey( name=context.from_column_name, - type=NORMALIZED_COLUMNS.get( - context.from_column_name, AttributeKey.TYPE_STRING - ), + type=AttributeKey.TYPE_STRING, ) ) return f.transform( @@ -340,8 +281,6 @@ def trace_item_filters_to_expression(item_filter: TraceItemFilter) -> Expression if item_filter.HasField("exists_filter"): k = item_filter.exists_filter.key - if k.name in NORMALIZED_COLUMNS.keys(): - return f.isNotNull(column(k.name)) if k.type == AttributeKey.Type.TYPE_STRING: return f.mapContains(column("attr_str"), literal(k.name)) else: @@ -363,6 +302,24 @@ def project_id_and_org_conditions(meta: RequestMeta) -> Expression: ) +def entity_key_from_trace_item_name(name: TraceItemName.ValueType) -> EntityKey: + # TODO type is not always specified, fix that then delete this + if name == TraceItemName.TRACE_ITEM_NAME_UNSPECIFIED: + return EntityKey("eap_spans_rpc") + if name == TraceItemName.TRACE_ITEM_NAME_EAP_SPANS: + return EntityKey("eap_spans_rpc") + raise BadSnubaRPCRequestException(f"unknown trace item type: ${name}") + + +def entity_from_trace_item_name(name: TraceItemName.ValueType) -> Entity: + entity_key = entity_key_from_trace_item_name(name) + return Entity( + key=entity_key, + schema=get_entity(entity_key).get_data_model(), + sample=None, + ) + + def timestamp_in_range_condition(start_ts: int, end_ts: int) -> Expression: return and_cond( f.less( diff --git a/snuba/web/rpc/v1/endpoint_time_series.py b/snuba/web/rpc/v1/endpoint_time_series.py index d263852fe2..49635d36c3 100644 --- a/snuba/web/rpc/v1/endpoint_time_series.py +++ b/snuba/web/rpc/v1/endpoint_time_series.py @@ -16,11 +16,8 @@ from snuba.attribution.appid import AppID from snuba.attribution.attribution_info import AttributionInfo -from snuba.datasets.entities.entity_key import EntityKey -from snuba.datasets.entities.factory import get_entity from snuba.datasets.pluggable_dataset import PluggableDataset from snuba.query import OrderBy, OrderByDirection, SelectedExpression -from snuba.query.data_source.simple import Entity from snuba.query.dsl import Functions as f from snuba.query.dsl import column from snuba.query.logical import Query @@ -38,6 +35,7 @@ from snuba.web.rpc.common.common import ( attribute_key_to_expression, base_conditions_and, + entity_from_trace_item_name, trace_item_filters_to_expression, treeify_or_and_conditions, ) @@ -188,12 +186,7 @@ def _convert_result_timeseries( def _build_query(request: TimeSeriesRequest) -> Query: - # TODO: This is hardcoded still - entity = Entity( - key=EntityKey("eap_spans"), - schema=get_entity(EntityKey("eap_spans")).get_data_model(), - sample=None, - ) + entity = entity_from_trace_item_name(request.meta.trace_item_name) aggregation_columns = [ SelectedExpression( diff --git a/snuba/web/rpc/v1/endpoint_trace_item_table.py b/snuba/web/rpc/v1/endpoint_trace_item_table.py index 7bece3f811..7cfcad3a6e 100644 --- a/snuba/web/rpc/v1/endpoint_trace_item_table.py +++ b/snuba/web/rpc/v1/endpoint_trace_item_table.py @@ -20,11 +20,8 @@ from snuba.attribution.appid import AppID from snuba.attribution.attribution_info import AttributionInfo -from snuba.datasets.entities.entity_key import EntityKey -from snuba.datasets.entities.factory import get_entity from snuba.datasets.pluggable_dataset import PluggableDataset from snuba.query import OrderBy, OrderByDirection, SelectedExpression -from snuba.query.data_source.simple import Entity from snuba.query.logical import Query from snuba.query.query_settings import HTTPQuerySettings from snuba.request import Request as SnubaRequest @@ -41,6 +38,7 @@ apply_virtual_columns, attribute_key_to_expression, base_conditions_and, + entity_from_trace_item_name, trace_item_filters_to_expression, treeify_or_and_conditions, ) @@ -79,12 +77,7 @@ def _convert_order_by( def _build_query(request: TraceItemTableRequest) -> Query: - # TODO: This is hardcoded still - entity = Entity( - key=EntityKey("eap_spans"), - schema=get_entity(EntityKey("eap_spans")).get_data_model(), - sample=None, - ) + entity = entity_from_trace_item_name(request.meta.trace_item_name) selected_columns = [] for column in request.columns: