Skip to content

Commit

Permalink
[CapMan] Add throttling to ReferrerGuardRail policy (#6014)
Browse files Browse the repository at this point in the history
* added throttling

* assertion

* added test

* made throttling configurable

* fix

* take out print statements

* comments

* changed default for roll out

* test

* test

* division

* descriptions

---------

Co-authored-by: Rachel Chen <[email protected]>
Co-authored-by: Rachel Chen <[email protected]>
  • Loading branch information
3 people authored Jun 11, 2024
1 parent 43a99f1 commit 85d8728
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 5 deletions.
45 changes: 40 additions & 5 deletions snuba/query/allocation_policies/per_referrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
logger = logging.getLogger("snuba.query.allocation_policy_per_referrer")

_DEFAULT_MAX_THREADS = 10
_DEFAULT_CONCURRENT_REQUEST_PER_REFERRER = 100
_REFERRER_CONCURRENT_OVERRIDE = -1
_REFERRER_MAX_THREADS_OVERRIDE = -1
_REQUESTS_THROTTLE_DIVIDER = 1
_THREADS_THROTTLE_DIVIDER = 1


class ReferrerGuardRailPolicy(BaseConcurrentRateLimitAllocationPolicy):
Expand All @@ -28,7 +33,9 @@ class ReferrerGuardRailPolicy(BaseConcurrentRateLimitAllocationPolicy):
This concern is orthogonal to customer rate limits in its purpose. This rate limiter being tripped is a problem
caused by sentry developers, not customer abuse. It either means that a feature was release that queries this referrer
too much or that an appropriate rate limit was not set somewhere upstream. It affects customers randomly and basically
acts as a load shedder.
acts as a load shedder. As a referrer approaches the rate limiter's threshold for rejecting queries, that referrer's
queries will get throttled. The threshold for throttling and the (reduced) number of threads are configurable via
_REQUESTS_THROTTLE_DIVIDER and _THREADS_THROTTLE_DIVIDER
For example, a product team may push out a feature that sends 20 snuba queries every 5 seconds on the UI.
In that case, that feature should break but others should continue to be served.
Expand All @@ -47,21 +54,33 @@ def _additional_config_definitions(self) -> list[AllocationPolicyConfig]:
""",
value_type=int,
param_types={},
default=100,
default=_DEFAULT_CONCURRENT_REQUEST_PER_REFERRER,
),
AllocationPolicyConfig(
name="referrer_concurrent_override",
description="""override the concurrent limit for a referrer""",
value_type=int,
param_types={"referrer": str},
default=-1,
default=_REFERRER_CONCURRENT_OVERRIDE,
),
AllocationPolicyConfig(
name="referrer_max_threads_override",
description="""override the max_threads for a referrer, applies to every query made by that referrer""",
param_types={"referrer": str},
value_type=int,
default=-1,
default=_REFERRER_MAX_THREADS_OVERRIDE,
),
AllocationPolicyConfig(
name="requests_throttle_divider",
description="default_concurrent_request_per_referrer divided by this value will be the threshold at which we will decrease the number of threads (THROTTLED_THREADS) used to execute queries",
value_type=int,
default=_REQUESTS_THROTTLE_DIVIDER,
),
AllocationPolicyConfig(
name="threads_throttle_divider",
description="max threads divided by this number is the number of threads we use to execute queries for a throttled referrer",
value_type=int,
default=_THREADS_THROTTLE_DIVIDER,
),
]

Expand Down Expand Up @@ -100,6 +119,22 @@ def _get_quota_allowance(
query_id,
rate_limit_params,
)
assert (
rate_limit_params.concurrent_limit is not None
), "concurrent_limit must be set"
num_threads = self._get_max_threads(referrer)
requests_throttle_threshold = max(
1,
self.get_config_value("default_concurrent_request_per_referrer")
// self.get_config_value("requests_throttle_divider"),
)
if rate_limit_stats.concurrent > requests_throttle_threshold:
num_threads = max(
1, num_threads // self.get_config_value("threads_throttle_divider")
)
self.metrics.increment(
"concurrent_queries_throttled", tags={"referrer": referrer}
)
self.metrics.timing(
"concurrent_queries_referrer",
rate_limit_stats.concurrent,
Expand All @@ -112,7 +147,7 @@ def _get_quota_allowance(
}
return QuotaAllowance(
can_run=can_run,
max_threads=self._get_max_threads(referrer),
max_threads=num_threads,
explanation=decision_explanation,
)

Expand Down
28 changes: 28 additions & 0 deletions tests/query/allocation_policies/test_per_referrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,34 @@ def test_policy_pass_basic(self):
tenant_ids={"referrer": "statistical_detectors"}, query_id="4"
).can_run

@pytest.mark.redis_db
def test_throttle(self) -> None:
policy = ReferrerGuardRailPolicy.from_kwargs(
**{
"storage_key": "generic_metrics_distributions",
"required_tenant_types": ["referrer"],
}
)

policy.set_config_value("default_concurrent_request_per_referrer", 4)
policy.set_config_value("requests_throttle_divider", 2)
policy.set_config_value("threads_throttle_divider", 2)
first_quota_allowance = policy.get_quota_allowance(
tenant_ids={"referrer": "statistical_detectors"}, query_id="1"
)
assert first_quota_allowance.max_threads == policy.max_threads

second_quota_allowance = policy.get_quota_allowance(
tenant_ids={"referrer": "statistical_detectors"}, query_id="2"
)
assert second_quota_allowance.max_threads == policy.max_threads

third_quota_allowance = policy.get_quota_allowance(
tenant_ids={"referrer": "statistical_detectors"}, query_id="3"
)
assert third_quota_allowance.max_threads == policy.max_threads // 2
assert third_quota_allowance.can_run

@pytest.mark.redis_db
def test_override(self):
policy = ReferrerGuardRailPolicy.from_kwargs(
Expand Down

0 comments on commit 85d8728

Please sign in to comment.