Skip to content

Commit

Permalink
Merge branch 'master' into krm/scrubstrattr
Browse files Browse the repository at this point in the history
  • Loading branch information
volokluev authored Dec 20, 2024
2 parents 89be7c4 + 4535ce0 commit 2cfe00b
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 13 deletions.
2 changes: 1 addition & 1 deletion snuba/manual_jobs/scrub_ips_from_eap_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def _get_query(self, cluster_name: str | None) -> str:
on_cluster = f"ON CLUSTER '{cluster_name}'" if cluster_name else ""
return f"""ALTER TABLE eap_spans_2_local
{on_cluster}
UPDATE `attr_str_1` = mapApply((k, v) -> (k, if(k = 'user.ip', 'scrubbed', v)), `attr_str_1`)
UPDATE `attr_str_14` = mapApply((k, v) -> (k, if(k = 'sentry.user.ip', 'scrubbed', v)), `attr_str_14`)
WHERE organization_id IN [{organization_ids}]
AND _sort_timestamp >= toDateTime('{start_datetime}')
AND _sort_timestamp < toDateTime('{end_datetime}')"""
Expand Down
2 changes: 1 addition & 1 deletion snuba/manual_jobs/scrub_users_from_eap_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_query(self, cluster_name: str | None) -> str:
on_cluster = f"ON CLUSTER '{cluster_name}'" if cluster_name else ""
return f"""ALTER TABLE eap_spans_2_local
{on_cluster}
UPDATE `attr_str_2` = mapApply((k, v) -> (k, if(k = 'user' AND startsWith(v, '{_IP_PREFIX}') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_2`)
UPDATE `attr_str_11` = mapApply((k, v) -> (k, if(k = 'sentry.user' AND startsWith(v, '{_IP_PREFIX}') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_11`)
WHERE organization_id IN [{organization_ids}]
AND _sort_timestamp >= toDateTime('{start_datetime}')
AND _sort_timestamp < toDateTime('{end_datetime}')"""
Expand Down
14 changes: 9 additions & 5 deletions tests/manual_jobs/test_scrub_ips_from_eap_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_generate_query() -> None:
job._get_query(None)
== """ALTER TABLE eap_spans_2_local
UPDATE `attr_str_1` = mapApply((k, v) -> (k, if(k = 'user.ip', 'scrubbed', v)), `attr_str_1`)
UPDATE `attr_str_14` = mapApply((k, v) -> (k, if(k = 'sentry.user.ip', 'scrubbed', v)), `attr_str_14`)
WHERE organization_id IN [1,3,5,6]
AND _sort_timestamp >= toDateTime('2024-12-01T00:00:00')
AND _sort_timestamp < toDateTime('2024-12-10T00:00:00')"""
Expand Down Expand Up @@ -175,6 +175,7 @@ def _gen_message(
"transaction.method": "POST",
"transaction.op": "http.server",
"user": "ip:127.0.0.1",
"user.ip": _USER_IP,
},
"span_id": "123456781234567D",
"tags": {
Expand All @@ -185,7 +186,6 @@ def _gen_message(
"relay_protocol_version": "3",
"relay_use_post_or_schedule": "True",
"relay_use_post_or_schedule_rejected": "version",
"user.ip": _USER_IP,
"spans_over_limit": "False",
"server_name": "blah",
"color": random.choice(["red", "green", "blue"]),
Expand Down Expand Up @@ -219,12 +219,16 @@ def _generate_request(
)
),
columns=[
Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="user.ip"))
Column(
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.user.ip")
)
],
order_by=[
TraceItemTableRequest.OrderBy(
column=Column(
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="user.ip")
key=AttributeKey(
type=AttributeKey.TYPE_STRING, name="sentry.user.ip"
)
)
)
],
Expand All @@ -235,7 +239,7 @@ def _generate_expected_response(ip: str) -> TraceItemTableResponse:
return TraceItemTableResponse(
column_values=[
TraceItemColumnValues(
attribute_name="user.ip",
attribute_name="sentry.user.ip",
results=[AttributeValue(val_str=ip) for _ in range(20)],
)
],
Expand Down
12 changes: 6 additions & 6 deletions tests/manual_jobs/test_scrub_users_from_eap_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_generate_query() -> None:
job._get_query(None)
== """ALTER TABLE eap_spans_2_local
UPDATE `attr_str_2` = mapApply((k, v) -> (k, if(k = 'user' AND startsWith(v, 'ip:') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_2`)
UPDATE `attr_str_11` = mapApply((k, v) -> (k, if(k = 'sentry.user' AND startsWith(v, 'ip:') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_11`)
WHERE organization_id IN [1,3,5,6]
AND _sort_timestamp >= toDateTime('2024-12-01T00:00:00')
AND _sort_timestamp < toDateTime('2024-12-10T00:00:00')"""
Expand Down Expand Up @@ -185,8 +185,6 @@ def _gen_message(
"relay_protocol_version": "3",
"relay_use_post_or_schedule": "True",
"relay_use_post_or_schedule_rejected": "version",
"user.ip": "192.168.0.45",
"user": user,
"spans_over_limit": "False",
"server_name": "blah",
"color": random.choice(["red", "green", "blue"]),
Expand Down Expand Up @@ -219,11 +217,13 @@ def _generate_request(
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color")
)
),
columns=[Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="user"))],
columns=[
Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.user"))
],
order_by=[
TraceItemTableRequest.OrderBy(
column=Column(
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="user")
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.user")
)
)
],
Expand All @@ -234,7 +234,7 @@ def _generate_expected_response(user: str) -> TraceItemTableResponse:
return TraceItemTableResponse(
column_values=[
TraceItemColumnValues(
attribute_name="user",
attribute_name="sentry.user",
results=[AttributeValue(val_str=user) for _ in range(20)],
)
],
Expand Down

0 comments on commit 2cfe00b

Please sign in to comment.