Skip to content

Commit

Permalink
✨ Feature: Add features to support setting different rate limits for …
Browse files Browse the repository at this point in the history
…different models at the user level.
  • Loading branch information
yym68686 committed Nov 25, 2024
1 parent 89affc8 commit 8e3a5c1
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,11 @@ api_keys:
# When SCHEDULING_ALGORITHM is random, use random polling load balancing, randomly request the channel of the model with a request.
# When SCHEDULING_ALGORITHM is round_robin, use polling load balancing, request the channel of the model used by the user in order.
AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true. Also supports setting a number, indicating the number of retries.
RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
# RATE_LIMIT: 2/min,10/day # Supports multiple frequency constraints
rate_limit: 15/min # Supports rate limiting, each API Key can request up to 15 times per minute, optional. The default is 999999/min. Supports multiple frequency constraints: 15/min,10/day
# rate_limit: # You can set different frequency limits for each model
# gemini-1.5-pro: 3/min
# gemini-1.5-flash: 2/min
# default: 4/min # If the model does not set the frequency limit, use the frequency limit of default
ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, it will moderate the user's message, if inappropriate messages are found, an error message will be returned.

# Channel-level weighted load balancing configuration example
Expand Down
7 changes: 5 additions & 2 deletions README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,11 @@ api_keys:
# 当 SCHEDULING_ALGORITHM 为 random 时,使用随机轮训负载均衡,随机请求拥有请求的模型的渠道。
# 当 SCHEDULING_ALGORITHM 为 round_robin 时,使用轮训负载均衡,按照顺序请求用户使用的模型的渠道。
AUTO_RETRY: true # 是否自动重试,自动重试下一个提供商,true 为自动重试,false 为不自动重试,默认为 true。也可以设置为数字,表示重试次数。
RATE_LIMIT: 2/min # 支持限流,每分钟最多请求次数,可以设置为整数,如 2/min,2 次每分钟、5/hour,5 次每小时、10/day,10 次每天,10/month,10 次每月,10/year,10 次每年。默认60/min,选填
# RATE_LIMIT: 2/min,10/day 支持多个频率约束条件
rate_limit: 15/min # 支持限流,每分钟最多请求次数,可以设置为整数,如 2/min,2 次每分钟、5/hour,5 次每小时、10/day,10 次每天,10/month,10 次每月,10/year,10 次每年。默认999999/min,选填。支持多个频率约束条件:15/min,10/day
# rate_limit: # 可以为每个模型设置不同的频率限制
# gemini-1.5-pro: 3/min
# gemini-1.5-flash: 2/min
# default: 4/min # 如果模型没有设置频率限制,使用 default 的频率限制
ENABLE_MODERATION: true # 是否开启消息道德审查,true 为开启,false 为不开启,默认为 false,当开启后,会对用户的消息进行道德审查,如果发现不当的消息,会返回错误信息。

# 渠道级加权负载均衡配置示例
Expand Down
19 changes: 19 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
error_handling_wrapper,
rate_limiter,
provider_api_circular_list,
ThreadSafeCircularList,
)

from collections import defaultdict
Expand Down Expand Up @@ -488,6 +489,15 @@ async def dispatch(self, request: Request, call_next):
model = request_model.model
current_info["model"] = model

final_api_key = app.state.api_list[api_index]
try:
await app.state.user_api_keys_rate_limit[final_api_key].next(model)
except Exception as e:
return JSONResponse(
status_code=429,
content={"error": "Too many requests"}
)

moderated_content = None
if request_model.request_type == "chat":
moderated_content = request_model.get_last_text_message()
Expand Down Expand Up @@ -666,6 +676,15 @@ async def ensure_config(request: Request, call_next):
# logger.warning("Config not found, attempting to reload")
app.state.config, app.state.api_keys_db, app.state.api_list = await load_config(app)

if app.state.api_list:
app.state.user_api_keys_rate_limit = defaultdict(ThreadSafeCircularList)
for api_index, api_key in enumerate(app.state.api_list):
app.state.user_api_keys_rate_limit[api_key] = ThreadSafeCircularList(
[api_key],
safe_get(app.state.config, 'api_keys', api_index, "preferences", "rate_limit", default={"default": "999999/min"}),
"round_robin"
)

for item in app.state.api_keys_db:
if item.get("role") == "admin":
app.state.admin_api_key = item.get("api")
Expand Down
2 changes: 1 addition & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ async def get_user_rate_limit(app, api_index: int = None):
# 这里应该实现根据 token 获取用户速率限制的逻辑
# 示例: 返回 (次数, 秒数)
config = app.state.config
raw_rate_limit = safe_get(config, 'api_keys', api_index, "preferences", "RATE_LIMIT")
raw_rate_limit = safe_get(config, 'api_keys', api_index, "preferences", "rate_limit")
# print("raw_rate_limit", raw_rate_limit)
# print("not api_index or not raw_rate_limit", api_index == None, not raw_rate_limit, api_index == None or not raw_rate_limit, api_index, raw_rate_limit)

Expand Down

0 comments on commit 8e3a5c1

Please sign in to comment.