From 30dd0b292f4f98694adb26a7c275f0e811055677 Mon Sep 17 00:00:00 2001
From: jinno <klvx01@gmail.com>
Date: Wed, 25 Dec 2024 02:30:52 +0900
Subject: [PATCH] fix: deployment_id for cooldown_handlers.py

---
 litellm/router.py                         | 16 ++++-----
 litellm/router_utils/cooldown_handlers.py | 44 +++++++++++------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/litellm/router.py b/litellm/router.py
index 3cd1ef4c2f65..8c267836883a 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -774,19 +774,19 @@ def _completion(
     @overload
     async def acompletion(
         self, model: str, messages: List[Dict[str, str]], stream: Literal[True], **kwargs
-    ) -> CustomStreamWrapper: 
+    ) -> CustomStreamWrapper:
         ...
 
     @overload
     async def acompletion(
         self, model: str, messages: List[Dict[str, str]], stream: Literal[False] = False, **kwargs
-    ) -> ModelResponse: 
+    ) -> ModelResponse:
         ...
 
     @overload
     async def acompletion(
         self, model: str, messages: List[Dict[str, str]], stream: Union[Literal[True], Literal[False]] = False, **kwargs
-    ) -> Union[CustomStreamWrapper, ModelResponse]: 
+    ) -> Union[CustomStreamWrapper, ModelResponse]:
         ...
 
     # fmt: on
@@ -1284,13 +1284,13 @@ async def check_response(task: asyncio.Task):
     @overload
     async def schedule_acompletion(
         self, model: str, messages: List[Dict[str, str]], priority: int, stream: Literal[False] = False, **kwargs
-    ) -> ModelResponse: 
+    ) -> ModelResponse:
         ...
-    
+
     @overload
     async def schedule_acompletion(
         self, model: str, messages: List[Dict[str, str]], priority: int, stream: Literal[True], **kwargs
-    ) -> CustomStreamWrapper: 
+    ) -> CustomStreamWrapper:
         ...
 
     # fmt: on
@@ -3370,7 +3370,7 @@ def deployment_callback_on_failure(
                     litellm_router_instance=self,
                     exception_status=exception_status,
                     original_exception=exception,
-                    deployment=deployment_id,
+                    deployment_id=deployment_id,
                     time_to_cooldown=_time_to_cooldown,
                 )  # setting deployment_id in cooldown deployments
 
@@ -3691,7 +3691,7 @@ async def async_routing_strategy_pre_call_checks(
                         litellm_router_instance=self,
                         exception_status=e.status_code,
                         original_exception=e,
-                        deployment=deployment["model_info"]["id"],
+                        deployment_id=deployment["model_info"]["id"],
                         time_to_cooldown=self.cooldown_time,
                     )
                     raise e
diff --git a/litellm/router_utils/cooldown_handlers.py b/litellm/router_utils/cooldown_handlers.py
index 1e1c58a771d4..29734ea9d6e6 100644
--- a/litellm/router_utils/cooldown_handlers.py
+++ b/litellm/router_utils/cooldown_handlers.py
@@ -36,7 +36,7 @@
 
 def _should_run_cooldown_logic(
     litellm_router_instance: LitellmRouter,
-    deployment: Optional[str],
+    deployment_id: Optional[str],
     exception_status: Union[str, int],
     original_exception: Any,
 ) -> bool:
@@ -46,25 +46,25 @@ def _should_run_cooldown_logic(
 
     Does not run cooldown logic when:
     - router.disable_cooldowns is True
-    - deployment is None
+    - deployment_id is None
     - _is_cooldown_required() returns False
-    - deployment is in litellm_router_instance.provider_default_deployment_ids
+    - deployment_id is in litellm_router_instance.provider_default_deployment_ids
     - exception_status is not one that should be immediately retried (e.g. 401)
     """
     if litellm_router_instance.disable_cooldowns:
         return False
 
-    if deployment is None:
+    if deployment_id is None:
         return False
 
     if not litellm_router_instance._is_cooldown_required(
-        model_id=deployment,
+        model_id=deployment_id,
         exception_status=exception_status,
         exception_str=str(original_exception),
     ):
         return False
 
-    if deployment in litellm_router_instance.provider_default_deployment_ids:
+    if deployment_id in litellm_router_instance.provider_default_deployment_ids:
         return False
 
     return True
@@ -72,7 +72,7 @@ def _should_run_cooldown_logic(
 
 def _should_cooldown_deployment(
     litellm_router_instance: LitellmRouter,
-    deployment: str,
+    deployment_id: str,
     exception_status: Union[str, int],
     original_exception: Any,
 ) -> bool:
@@ -102,10 +102,10 @@ def _should_cooldown_deployment(
         is False
     ):
         num_successes_this_minute = get_deployment_successes_for_current_minute(
-            litellm_router_instance=litellm_router_instance, deployment_id=deployment
+            litellm_router_instance=litellm_router_instance, deployment_id=deployment_id
         )
         num_fails_this_minute = get_deployment_failures_for_current_minute(
-            litellm_router_instance=litellm_router_instance, deployment_id=deployment
+            litellm_router_instance=litellm_router_instance, deployment_id=deployment_id
         )
 
         total_requests_this_minute = num_successes_this_minute + num_fails_this_minute
@@ -115,8 +115,8 @@ def _should_cooldown_deployment(
                 num_successes_this_minute + num_fails_this_minute
             )
         verbose_router_logger.debug(
-            "percent fails for deployment = %s, percent fails = %s, num successes = %s, num fails = %s",
-            deployment,
+            "percent fails for deployment_id = %s, percent fails = %s, num successes = %s, num fails = %s",
+            deployment_id,
             percent_fails,
             num_successes_this_minute,
             num_fails_this_minute,
@@ -143,7 +143,7 @@ def _should_cooldown_deployment(
     else:
         return should_cooldown_based_on_allowed_fails_policy(
             litellm_router_instance=litellm_router_instance,
-            deployment=deployment,
+            deployment_id=deployment_id,
             original_exception=original_exception,
         )
 
@@ -154,7 +154,7 @@ def _set_cooldown_deployments(
     litellm_router_instance: LitellmRouter,
     original_exception: Any,
     exception_status: Union[str, int],
-    deployment: Optional[str] = None,
+    deployment_id: Optional[str] = None,
     time_to_cooldown: Optional[float] = None,
 ) -> bool:
     """
@@ -170,25 +170,25 @@ def _set_cooldown_deployments(
     """
     if (
         _should_run_cooldown_logic(
-            litellm_router_instance, deployment, exception_status, original_exception
+            litellm_router_instance, deployment_id, exception_status, original_exception
         )
         is False
-        or deployment is None
+        or deployment_id is None
     ):
         return False
 
     exception_status_int = cast_exception_status_to_int(exception_status)
 
-    verbose_router_logger.debug(f"Attempting to add {deployment} to cooldown list")
+    verbose_router_logger.debug(f"Attempting to add {deployment_id} to cooldown list")
     cooldown_time = litellm_router_instance.cooldown_time or 1
     if time_to_cooldown is not None:
         cooldown_time = time_to_cooldown
 
     if _should_cooldown_deployment(
-        litellm_router_instance, deployment, exception_status, original_exception
+        litellm_router_instance, deployment_id, exception_status, original_exception
     ):
         litellm_router_instance.cooldown_cache.add_deployment_to_cooldown(
-            model_id=deployment,
+            model_id=deployment_id,
             original_exception=original_exception,
             exception_status=exception_status_int,
             cooldown_time=cooldown_time,
@@ -198,7 +198,7 @@ def _set_cooldown_deployments(
         asyncio.create_task(
             router_cooldown_event_callback(
                 litellm_router_instance=litellm_router_instance,
-                deployment_id=deployment,
+                deployment_id=deployment_id,
                 exception_status=exception_status,
                 cooldown_time=cooldown_time,
             )
@@ -284,7 +284,7 @@ def _get_cooldown_deployments(
 
 def should_cooldown_based_on_allowed_fails_policy(
     litellm_router_instance: LitellmRouter,
-    deployment: str,
+    deployment_id: str,
     original_exception: Any,
 ) -> bool:
     """
@@ -304,14 +304,14 @@ def should_cooldown_based_on_allowed_fails_policy(
         litellm_router_instance.cooldown_time or DEFAULT_COOLDOWN_TIME_SECONDS
     )
 
-    current_fails = litellm_router_instance.failed_calls.get_cache(key=deployment) or 0
+    current_fails = litellm_router_instance.failed_calls.get_cache(key=deployment_id) or 0
     updated_fails = current_fails + 1
 
     if updated_fails > allowed_fails:
         return True
     else:
         litellm_router_instance.failed_calls.set_cache(
-            key=deployment, value=updated_fails, ttl=cooldown_time
+            key=deployment_id, value=updated_fails, ttl=cooldown_time
         )
 
     return False