diff --git a/CHANGELOG.md b/CHANGELOG.md index f4e39b4..a73b0dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added support for LiteLLM. + ## [0.2.0] - 2023-09-19 ### Added diff --git a/llm_app/model_wrappers/__init__.py b/llm_app/model_wrappers/__init__.py index 5017783..f34fc5f 100644 --- a/llm_app/model_wrappers/__init__.py +++ b/llm_app/model_wrappers/__init__.py @@ -6,6 +6,10 @@ HFFeatureExtractionTask, HFTextGenerationTask, ) +from llm_app.model_wrappers.litellm_wrapper.api_models import ( + LiteLLMChatModel, + LiteLLMEmbeddingModel, +) from llm_app.model_wrappers.openai_wrapper.api_models import ( OpenAIChatGPTModel, OpenAIEmbeddingModel, @@ -19,6 +23,8 @@ "HFApiTextGenerationTask", "HFFeatureExtractionTask", "HFTextGenerationTask", + "LiteLLMChatModel", + "LiteLLMEmbeddingModel", "OpenAIChatGPTModel", "OpenAIEmbeddingModel", "SentenceTransformerTask", diff --git a/llm_app/model_wrappers/api_clients/clients.py b/llm_app/model_wrappers/api_clients/clients.py index 46a907d..cdaf460 100644 --- a/llm_app/model_wrappers/api_clients/clients.py +++ b/llm_app/model_wrappers/api_clients/clients.py @@ -61,3 +61,44 @@ def make_request(self, **kwargs): url = f"{self.api_url_prefix}/{endpoint}" response = requests.post(url, headers=self.headers, json=kwargs) return response.json() + + +class LiteLLMClient(APIClient): + """ + A wrapper for the LiteLLM. + + Attributes: + task_fn (Callable): Function reference for the specified task. + + Args: + task (str, optional): Type of task to be executed. Defaults to "completion". + Supported tasks are: + - "completion" + - "embedding" + + Raises: + ValueError: If the provided task is not supported. + """ + + def __init__(self, task: str = "completion") -> None: + """ + Initializes the client with the specified task type. + + Args: + task (str, optional): Type of task. Defaults to "completion". + Supported are 'completion' and 'embedding'. + """ + from litellm import completion, embedding + + if task == "completion": + self.task_fn = completion + elif task == "embedding": + self.task_fn = embedding + else: + raise ValueError("Supported tasks are (completion, embedding).") + + def make_request(self, **kwargs): + """ + Makes a request to the LLM service using the specified task function. + """ + return self.task_fn(**kwargs) diff --git a/llm_app/model_wrappers/litellm_wrapper/__init__.py b/llm_app/model_wrappers/litellm_wrapper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/llm_app/model_wrappers/litellm_wrapper/api_models.py b/llm_app/model_wrappers/litellm_wrapper/api_models.py new file mode 100644 index 0000000..f25b110 --- /dev/null +++ b/llm_app/model_wrappers/litellm_wrapper/api_models.py @@ -0,0 +1,158 @@ +import pathway as pw + +from llm_app.model_wrappers.api_clients.clients import LiteLLMClient +from llm_app.model_wrappers.base import BaseModel + + +class LiteLLMChatModel(BaseModel): + def __init__(self): + super().__init__() + self.api_client = self.get_client() + + def get_client( + self, + ) -> LiteLLMClient: + return LiteLLMClient(task="completion") + + def __call__(self, text: str, locator="gpt-3.5-turbo", **kwargs) -> str: + """ + + Example + # >>> os.environ["OPENAI_API_KEY"] = "" + # >>> model = LiteLLMChatModel() + # >>> model( + # ... locator='gpt-4-0613', + # ... text="Tell me a joke about jokes", + # ... temperature=1.1 + # ... ) + """ + + messages = [ + dict(role="system", content="You are a helpful assistant"), + dict(role="user", content=text), + ] + response = self.api_client.make_request( + messages=messages, model=locator, **kwargs + ) + return response.choices[0].message.content + + def apply( + self, + *args, + **kwargs, + ) -> pw.ColumnExpression: + """ + Applies the specified API model in `locator` to the provided text. + Parameters + ---------- + text : Union[pw.ColumnExpression, str] + The input text on which the model will be applied. It can be a column expression or a string. + locator : Union[pw.ColumnExpression, str, None] + The model locator to use for applying the model. + If provided, it should be a column expression or a string. + Otherwise, the default chat completion model `gpt-3.5-turbo` is applied. + Please visit https://docs.litellm.ai/docs/ to see the available models. + **kwargs : dict + Additional keyword arguments that will be used for the model application. + These could include settings such as `temperature`, `max_tokens`, etc. + Returns + ------- + pw.ColumnExpression + The result of the model application as a column expression or str. + Please note that the output is `chat_completion.choices[0].message.content` + where `chat_completion` is the api response. + Example: + # >>> os.environ["OPENAI_API_KEY"] = "" + # >>> os.environ["COHERE_API_KEY"] = "" + # >>> model = LiteLLMChatModel() + # >>> table = pw.debug.table_from_pandas( + # ... pd.DataFrame.from_records([ + # ... {"text": "How to use pathway to process a kafka stream ?"}, + # ... {"text": "How to apply a function to a pathway table ?"} + # ... ]) + # ... ) + # >>> table += table.select( + # ... openai_response = model.apply( + # ... pw.this.text, + # ... locator='gpt-4', + # ... temperature=1.5, + # ... max_tokens=1000 + # ... ) + # ... ) + # >>> table += table.select( + # ... cohere_response = model.apply( + # ... pw.this.text, + # ... locator='command-nightly', + # ... temperature=1.5, + # ... max_tokens=1000 + # ... ) + # ... ) + """ + return super().apply(*args, **kwargs) + + +class LiteLLMEmbeddingModel(BaseModel): + def __init__(self): + super().__init__() + self.api_client = self.get_client() + + def get_client(self) -> LiteLLMClient: + return LiteLLMClient(task="embedding") + + def __call__(self, text: str, locator="text-embedding-ada-002", **kwargs): + """ + Example: + + # >>> os.environ["OPENAI_API_KEY"] = "" + # >>> embedder = LiteLLMEmbeddingModel() + # >>> embedder( + # ... text='Some random text' + # ... locator='text-embedding-ada-002' + # ... ) + """ + + response = self.api_client.make_request(input=[text], model=locator, **kwargs) + return response["data"][0]["embedding"] + + def apply( + self, + *args, + **kwargs, + ) -> pw.ColumnExpression: + """ + Applies the specified API model in `locator` to the provided text. + Parameters + ---------- + text : Union[pw.ColumnExpression, str] + The input text on which the model will be applied. It can be a column expression or a constant value. + locator : Union[pw.ColumnExpression, str, None] + The model locator to use for applying the model. + If provided, it should be a column expression or a constant value. + Otherwise, the default chat completion model `gpt-3.5-turbo` is applied. + Please visit https://docs.litellm.ai/docs/embedding/supported_embedding + to see the available models. + **kwargs : dict + Additional keyword arguments that will be used for the model application. + These could include settings such as `temperature`, `max_tokens`, etc. + Returns + ------- + pw.ColumnExpression + The result of the model application as a column expression or constant of type list. + Please note that the output is `results["data"][0]["embedding"]` + Example: + # >>> os.environ["OPENAI_API_KEY"] = "" + # >>> embedder = LiteLLMEmbeddingModel() + # >>> table = pw.debug.table_from_pandas( + # ... pd.DataFrame.from_records([ + # ... {"text": "How to use pathway to process a kafka stream ?"}, + # ... {"text": "How to apply a function to a pathway table ?"} + # ... ]) + # ... ) + # >>> table += table.select( + # ... embedding = embedder.apply( + # ... pw.this.text, + # ... locator='text-embedding-ada-002' + # ... ) + # ... ) + """ + return super().apply(*args, **kwargs)