From af7ce7600bb8e8192c3e65178759dd5073e54917 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 14:40:26 -0800 Subject: [PATCH 01/11] fix - use lru cache on hit functions --- litellm/caching/_internal_lru_cache.py | 18 ++++++++++++++++++ .../get_llm_provider_logic.py | 2 ++ .../get_supported_openai_params.py | 2 ++ 3 files changed, 22 insertions(+) create mode 100644 litellm/caching/_internal_lru_cache.py diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py new file mode 100644 index 000000000000..50905ae9e212 --- /dev/null +++ b/litellm/caching/_internal_lru_cache.py @@ -0,0 +1,18 @@ +from functools import lru_cache +from typing import Any, Callable, ParamSpec, TypeVar, cast + +P = ParamSpec("P") +T = TypeVar("T", bound=Callable[..., Any]) + + +def typed_lru_cache(maxsize: int) -> Callable[[T], T]: + """ + Decorator to cache the result of a function with a configurable maximum size. + Args: + maxsize (int): Maximum size of the cache. Defaults to 128. + """ + + def decorator(f: T) -> T: + return cast(T, lru_cache(maxsize=maxsize)(f)) + + return decorator diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index 834e35c733fa..2d5766834edf 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -3,6 +3,7 @@ import httpx import litellm +from litellm.caching._internal_lru_cache import typed_lru_cache from litellm.secret_managers.main import get_secret, get_secret_str from ..types.router import LiteLLM_Params @@ -84,6 +85,7 @@ def handle_anthropic_text_model_custom_llm_provider( return model, custom_llm_provider +@typed_lru_cache(maxsize=16) def get_llm_provider( # noqa: PLR0915 model: str, custom_llm_provider: Optional[str] = None, diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py index e251784f4e18..d9e010618ad3 100644 --- a/litellm/litellm_core_utils/get_supported_openai_params.py +++ b/litellm/litellm_core_utils/get_supported_openai_params.py @@ -2,9 +2,11 @@ import litellm from litellm import LlmProviders +from litellm.caching._internal_lru_cache import typed_lru_cache from litellm.exceptions import BadRequestError +@typed_lru_cache(maxsize=16) def get_supported_openai_params( # noqa: PLR0915 model: str, custom_llm_provider: Optional[str] = None, From c7e3600c6982bee9b4b8e27fb27998b3ffa39f9c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 15:04:17 -0800 Subject: [PATCH 02/11] ci/cd run again --- tests/llm_translation/test_xai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/llm_translation/test_xai.py b/tests/llm_translation/test_xai.py index fe909a4d2e1b..de4bfc907d08 100644 --- a/tests/llm_translation/test_xai.py +++ b/tests/llm_translation/test_xai.py @@ -6,7 +6,7 @@ sys.path.insert( 0, os.path.abspath("../..") -) # Adds the parent directory to the system-path +) # Adds the parent directory to the system path import httpx From 1a658ff82d893f7fda934d142501fad940c41566 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 15:06:07 -0800 Subject: [PATCH 03/11] fix import on python 3.8 --- litellm/caching/_internal_lru_cache.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py index 50905ae9e212..709cacba2701 100644 --- a/litellm/caching/_internal_lru_cache.py +++ b/litellm/caching/_internal_lru_cache.py @@ -1,5 +1,11 @@ from functools import lru_cache -from typing import Any, Callable, ParamSpec, TypeVar, cast + +try: + from typing import Any, Callable, ParamSpec, TypeVar, cast +except ImportError: + from typing import Any, Callable, TypeVar, cast + + from typing_extensions import ParamSpec P = ParamSpec("P") T = TypeVar("T", bound=Callable[..., Any]) From 11818d22b1ad125345a618c9f4e775a1ce8ed6d4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 15:13:09 -0800 Subject: [PATCH 04/11] do nothing in _response_cost_calculator --- litellm/litellm_core_utils/litellm_logging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index cbec481ab0f8..bc7587382d6d 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -778,6 +778,7 @@ def _response_cost_calculator( used for consistent cost calculation across response headers + logging integrations. """ + return ## RESPONSE COST ## custom_pricing = use_custom_pricing_for_model( litellm_params=( From f4daaa3063e8519f3faad3e2623112f30b58e76f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 15:14:03 -0800 Subject: [PATCH 05/11] Revert "do nothing in _response_cost_calculator" This reverts commit 11818d22b1ad125345a618c9f4e775a1ce8ed6d4. --- litellm/litellm_core_utils/litellm_logging.py | 1 - 1 file changed, 1 deletion(-) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index bc7587382d6d..cbec481ab0f8 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -778,7 +778,6 @@ def _response_cost_calculator( used for consistent cost calculation across response headers + logging integrations. """ - return ## RESPONSE COST ## custom_pricing = use_custom_pricing_for_model( litellm_params=( From 5d9322da12bab84c6ab88b382acc5aa1a140e9d1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 15:19:29 -0800 Subject: [PATCH 06/11] fix typed_lru_cache --- litellm/caching/_internal_lru_cache.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py index 709cacba2701..1d51becee634 100644 --- a/litellm/caching/_internal_lru_cache.py +++ b/litellm/caching/_internal_lru_cache.py @@ -14,11 +14,21 @@ def typed_lru_cache(maxsize: int) -> Callable[[T], T]: """ Decorator to cache the result of a function with a configurable maximum size. + Skips caching if any arguments are not hashable. + Args: maxsize (int): Maximum size of the cache. Defaults to 128. """ def decorator(f: T) -> T: - return cast(T, lru_cache(maxsize=maxsize)(f)) + cached_f = lru_cache(maxsize=maxsize)(f) + + def wrapper(*args, **kwargs): + try: + return cached_f(*args, **kwargs) + except TypeError: + return f(*args, **kwargs) + + return cast(T, wrapper) return decorator From ac4a8ee7a77906744b28c4dd840e8d838c1fee7b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 15:25:51 -0800 Subject: [PATCH 07/11] fix internal lru cache linting --- litellm/caching/_internal_lru_cache.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py index 1d51becee634..57101ab6f291 100644 --- a/litellm/caching/_internal_lru_cache.py +++ b/litellm/caching/_internal_lru_cache.py @@ -1,11 +1,11 @@ from functools import lru_cache +from typing import Any, Callable, TypeVar, cast try: - from typing import Any, Callable, ParamSpec, TypeVar, cast + from typing import ParamSpec # Try to import from typing first except ImportError: - from typing import Any, Callable, TypeVar, cast + from typing_extensions import ParamSpec # Fall back to typing_extensions - from typing_extensions import ParamSpec P = ParamSpec("P") T = TypeVar("T", bound=Callable[..., Any]) From 3e6d1f37e8ef96953ef2a6344d31658eb47f01da Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 15:32:26 -0800 Subject: [PATCH 08/11] fix lint check --- litellm/caching/_internal_lru_cache.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py index 57101ab6f291..6e4048d66d28 100644 --- a/litellm/caching/_internal_lru_cache.py +++ b/litellm/caching/_internal_lru_cache.py @@ -1,11 +1,7 @@ from functools import lru_cache from typing import Any, Callable, TypeVar, cast -try: - from typing import ParamSpec # Try to import from typing first -except ImportError: - from typing_extensions import ParamSpec # Fall back to typing_extensions - +from typing_extensions import ParamSpec P = ParamSpec("P") T = TypeVar("T", bound=Callable[..., Any]) From 29bc18b2933268a6c33f1de959c2ba29d1cc8c47 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 16:02:13 -0800 Subject: [PATCH 09/11] fixes on lru cache --- litellm/caching/_internal_lru_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py index 6e4048d66d28..e132e5fda9bc 100644 --- a/litellm/caching/_internal_lru_cache.py +++ b/litellm/caching/_internal_lru_cache.py @@ -17,7 +17,7 @@ def typed_lru_cache(maxsize: int) -> Callable[[T], T]: """ def decorator(f: T) -> T: - cached_f = lru_cache(maxsize=maxsize)(f) + cached_f = lru_cache(maxsize=maxsize, typed=True)(f) def wrapper(*args, **kwargs): try: From e2225c970410c2c2cd952b1fba5b4775572b80a4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 16:39:40 -0800 Subject: [PATCH 10/11] use lru cache on get llm provider --- litellm/caching/_internal_lru_cache.py | 32 ++++--------------- .../get_llm_provider_logic.py | 27 ++++++++++++++-- litellm/types/router.py | 4 +-- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py index e132e5fda9bc..78df493fc879 100644 --- a/litellm/caching/_internal_lru_cache.py +++ b/litellm/caching/_internal_lru_cache.py @@ -1,30 +1,12 @@ -from functools import lru_cache -from typing import Any, Callable, TypeVar, cast +from functools import lru_cache, wraps +from typing import Callable, TypeVar, cast -from typing_extensions import ParamSpec +RT = TypeVar("RT") # Return type -P = ParamSpec("P") -T = TypeVar("T", bound=Callable[..., Any]) - -def typed_lru_cache(maxsize: int) -> Callable[[T], T]: - """ - Decorator to cache the result of a function with a configurable maximum size. - Skips caching if any arguments are not hashable. - - Args: - maxsize (int): Maximum size of the cache. Defaults to 128. - """ - - def decorator(f: T) -> T: - cached_f = lru_cache(maxsize=maxsize, typed=True)(f) - - def wrapper(*args, **kwargs): - try: - return cached_f(*args, **kwargs) - except TypeError: - return f(*args, **kwargs) - - return cast(T, wrapper) +def typed_lru_cache(maxsize: int = 128) -> Callable: + def decorator(func: Callable[..., RT]) -> Callable[..., RT]: + wrapped = lru_cache(maxsize=maxsize)(func) + return cast(Callable[..., RT], wraps(func)(wrapped)) return decorator diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index 2d5766834edf..6c339d7bc8b7 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -1,4 +1,5 @@ -from typing import Optional, Tuple +from functools import lru_cache +from typing import Optional, Tuple, overload import httpx @@ -85,7 +86,29 @@ def handle_anthropic_text_model_custom_llm_provider( return model, custom_llm_provider -@typed_lru_cache(maxsize=16) +@overload +def get_llm_provider( + model: str, + custom_llm_provider: str, + api_base: str, + api_key: str, + litellm_params: LiteLLM_Params, +) -> Tuple[str, str, str, str]: + pass + + +@overload +def get_llm_provider( + model: str, + custom_llm_provider: Optional[str] = None, + api_base: Optional[str] = None, + api_key: Optional[str] = None, + litellm_params: Optional[LiteLLM_Params] = None, +) -> Tuple[str, str, Optional[str], Optional[str]]: + pass + + +@lru_cache(maxsize=16) def get_llm_provider( # noqa: PLR0915 model: str, custom_llm_provider: Optional[str] = None, diff --git a/litellm/types/router.py b/litellm/types/router.py index 8c671fe52c8c..7bf54126f704 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -177,7 +177,7 @@ class GenericLiteLLMParams(BaseModel): max_budget: Optional[float] = None budget_duration: Optional[str] = None - model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True, frozen=True) def __init__( self, @@ -249,7 +249,7 @@ class LiteLLM_Params(GenericLiteLLMParams): """ model: str - model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True, frozen=True) def __init__( self, From 76dae76d5f761c2c0fd7aa02c81c761207ce80d8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 Jan 2025 17:07:01 -0800 Subject: [PATCH 11/11] fix get get_supported_openai_params --- litellm/litellm_core_utils/get_supported_openai_params.py | 1 - 1 file changed, 1 deletion(-) diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py index d9e010618ad3..62f84e0ebd5a 100644 --- a/litellm/litellm_core_utils/get_supported_openai_params.py +++ b/litellm/litellm_core_utils/get_supported_openai_params.py @@ -6,7 +6,6 @@ from litellm.exceptions import BadRequestError -@typed_lru_cache(maxsize=16) def get_supported_openai_params( # noqa: PLR0915 model: str, custom_llm_provider: Optional[str] = None,