diff --git a/config/anyscale.yaml b/config/anyscale.yaml index 12eaf9d5..df529fbc 100644 --- a/config/anyscale.yaml +++ b/config/anyscale.yaml @@ -19,16 +19,16 @@ chat_engine: params: max_prompt_tokens: 2048 # The maximum number of tokens to use for input prompt to the LLM. llm: &llm - type: AnyscaleLLM # Options: [OpenAILLM, AnyscaleLLM] + type: AnyscaleLLM params: model_name: meta-llama/Llama-2-7b-chat-hf # The name of the model to use. query_builder: - # -------------------------------------------------------------------- - # Configuration for the QueryBuilder subcomponent of the chat engine. - # Since Anyscale's LLM endpoint currently doesn't support function calling, we will use the InstructionQueryGenerator - # -------------------------------------------------------------------- - type: InstructionQueryGenerator # Options: [InstructionQueryGenerator, LastMessageQueryGenerator] + type: FunctionCallingQueryGenerator # Options: [FunctionCallingQueryGenerator, LastMessageQueryGenerator, InstructionQueryGenerator] + llm: + type: AnyscaleLLM + params: + model_name: mistralai/Mistral-7B-Instruct-v0.1 context_engine: # ------------------------------------------------------------------------------------------------------------- @@ -43,7 +43,7 @@ chat_engine: # Configuration for the RecordEncoder subcomponent of the knowledge base. # Use Anyscale's Embedding endpoint for dense encoding # -------------------------------------------------------------------------- - type: AnyscaleRecordEncoder # Options: [OpenAIRecordEncoder, AnyscaleRecordEncoder] + type: AnyscaleRecordEncoder params: model_name: # The name of the model to use for encoding thenlper/gte-large diff --git a/src/canopy/chat_engine/query_generator/function_calling.py b/src/canopy/chat_engine/query_generator/function_calling.py index 356f3ee1..d53e821d 100644 --- a/src/canopy/chat_engine/query_generator/function_calling.py +++ b/src/canopy/chat_engine/query_generator/function_calling.py @@ -43,16 +43,13 @@ def generate(self, chat_history=messages, function=self._function ) - except RuntimeError as e: - if "function calling" in str(e): - raise RuntimeError( - "FunctionCallingQueryGenerator requires an LLM that supports " - "function calling. Please provide a different LLM, " - "or alternatively select a different QueryGenerator class. " - f"Received the following error from LLM:\n{e}" - ) from e - - raise + except NotImplementedError as e: + raise RuntimeError( + "FunctionCallingQueryGenerator requires an LLM that supports " + "function calling. Please provide a different LLM, " + "or alternatively select a different QueryGenerator class. " + f"Received the following error from LLM:\n{e}" + ) from e return [Query(text=q) for q in arguments["queries"]] diff --git a/src/canopy/llm/anyscale.py b/src/canopy/llm/anyscale.py index 546c97d4..b98ba4fb 100644 --- a/src/canopy/llm/anyscale.py +++ b/src/canopy/llm/anyscale.py @@ -5,6 +5,10 @@ from canopy.models.data_models import Messages ANYSCALE_BASE_URL = "https://api.endpoints.anyscale.com/v1" +FUNCTION_MODEL_LIST = [ + "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mixtral-8x7B-Instruct-v0.1", +] class AnyscaleLLM(OpenAILLM): @@ -42,7 +46,21 @@ def enforced_function_call( max_tokens: Optional[int] = None, model_params: Optional[dict] = None, ) -> dict: - raise NotImplementedError() + model = self.model_name + if model_params and "model" in model_params: + model = model_params["model"] + if model not in FUNCTION_MODEL_LIST: + raise NotImplementedError( + f"Model {model} doesn't support function calling. " + "To use function calling capability, please select a different model.\n" + "Pleaes check following link for details: " + "https://docs.endpoints.anyscale.com/guides/function-calling" + ) + else: + return super().enforced_function_call( + system_prompt, chat_history, function, + max_tokens=max_tokens, model_params=model_params + ) def aenforced_function_call(self, system_prompt: str, diff --git a/src/canopy/llm/azure_openai_llm.py b/src/canopy/llm/azure_openai_llm.py index 29f831ad..391efd87 100644 --- a/src/canopy/llm/azure_openai_llm.py +++ b/src/canopy/llm/azure_openai_llm.py @@ -72,34 +72,34 @@ def available_models(self): "Azure OpenAI LLM does not support listing available models" ) - def _handle_chat_error(self, e): + def _handle_chat_error(self, e, is_function_call=False): if isinstance(e, openai.AuthenticationError): raise RuntimeError( "Failed to connect to Azure OpenAI, please make sure that the " "AZURE_OPENAI_API_KEY environment variable is set correctly. " f"Underlying Error:\n{self._format_openai_error(e)}" - ) + ) from e elif isinstance(e, openai.APIConnectionError): raise RuntimeError( f"Failed to connect to your Azure OpenAI endpoint, please make sure " f"that the provided endpoint {os.getenv('AZURE_OPENAI_ENDPOINT')} " f"is correct. Underlying Error:\n{self._format_openai_error(e)}" - ) + ) from e elif isinstance(e, openai.NotFoundError): - if e.type and 'invalid' in e.type: - raise RuntimeError( + if e.type and 'invalid' in e.type and is_function_call: + raise NotImplementedError( f"It seems that you are trying to use OpenAI's `function calling` " f"or `tools` features. Please note that Azure OpenAI only supports " f"function calling for specific models and API versions. More " f"details in: https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/function-calling. " # noqa: E501 f"Underlying Error:\n{self._format_openai_error(e)}" - ) + ) from e else: raise RuntimeError( f"Failed to connect to your Azure OpenAI. Please make sure that " f"you have provided the correct deployment name: {self.model_name} " f"and API version: {self._client._api_version}. " f"Underlying Error:\n{self._format_openai_error(e)}" - ) + ) from e else: super()._handle_chat_error(e) diff --git a/src/canopy/llm/models.py b/src/canopy/llm/models.py index 2cd40736..ed0c33a2 100644 --- a/src/canopy/llm/models.py +++ b/src/canopy/llm/models.py @@ -20,6 +20,7 @@ class FunctionArrayProperty(BaseModel): def dict(self, *args, **kwargs): super_dict = super().dict(*args, **kwargs) if "items_type" in super_dict: + super_dict["type"] = "array" super_dict["items"] = {"type": super_dict.pop("items_type")} return super_dict diff --git a/src/canopy/llm/openai.py b/src/canopy/llm/openai.py index 55079f98..3e73248d 100644 --- a/src/canopy/llm/openai.py +++ b/src/canopy/llm/openai.py @@ -221,7 +221,7 @@ def enforced_function_call(self, **model_params_dict ) except openai.OpenAIError as e: - self._handle_chat_error(e) + self._handle_chat_error(e, is_function_call=True) result = chat_completion.choices[0].message.tool_calls[0].function.arguments arguments = json.loads(result) @@ -262,7 +262,15 @@ def _format_openai_error(e): except Exception: return str(e) - def _handle_chat_error(self, e): + def _handle_chat_error(self, e, is_function_call=False): + if isinstance(e, openai.NotFoundError) and is_function_call: + if e.type and 'invalid' in e.type: + raise NotImplementedError( + f"The selected model ({self.model_name}) does not support " + f" function calling. " + f"Underlying Error:\n{self._format_openai_error(e)}" + ) from e + provider_name = self.__class__.__name__.replace("LLM", "") raise RuntimeError( f"Failed to use {provider_name}'s {self.model_name} model for chat " diff --git a/tests/system/llm/conftest.py b/tests/system/llm/conftest.py index 9b903b6b..bc7eec92 100644 --- a/tests/system/llm/conftest.py +++ b/tests/system/llm/conftest.py @@ -8,5 +8,6 @@ def messages(): # Create a list of MessageBase objects return [ UserMessage(content="Hello, assistant."), - AssistantMessage(content="Hello, user. How can I assist you?") + AssistantMessage(content="Hello, user. How can I assist you?"), + UserMessage(content="Just checking in. Be concise."), ] diff --git a/tests/system/llm/test_anyscale.py b/tests/system/llm/test_anyscale.py deleted file mode 100644 index 636ae717..00000000 --- a/tests/system/llm/test_anyscale.py +++ /dev/null @@ -1,164 +0,0 @@ -from unittest.mock import MagicMock - -import pytest - - -from canopy.models.data_models import Role, MessageBase, Context, StringContextContent # noqa -from canopy.models.api_models import ChatResponse, StreamingChatChunk # noqa -from canopy.llm.anyscale import AnyscaleLLM # noqa -from openai import BadRequestError # noqa - - -SYSTEM_PROMPT = "You are a helpful assistant." - - -def assert_chat_completion(response, num_choices=1): - assert len(response.choices) == num_choices - for choice in response.choices: - assert isinstance(choice.message, MessageBase) - assert isinstance(choice.message.content, str) - assert len(choice.message.content) > 0 - assert isinstance(choice.message.role, Role) - - -def assert_function_call_format(result): - assert isinstance(result, dict) - assert "queries" in result - assert isinstance(result["queries"], list) - assert len(result["queries"]) > 0 - assert isinstance(result["queries"][0], str) - assert len(result["queries"][0]) > 0 - - -@pytest.fixture -def model_name(): - return "meta-llama/Llama-2-7b-chat-hf" - - -@pytest.fixture -def messages(): - # Create a list of MessageBase objects - return [ - MessageBase(role=Role.USER, content="Hello, assistant."), - MessageBase( - role=Role.ASSISTANT, content="Hello, user. How can I assist you?" - ), - ] - - -@pytest.fixture -def model_params_high_temperature(): - # `n` parameter is not supported yet. set to 1 always - return {"temperature": 0.9, "top_p": 0.95, "n": 1} - - -@pytest.fixture -def model_params_low_temperature(): - return {"temperature": 0.2, "top_p": 0.5, "n": 1} - - -@pytest.fixture -def anyscale_llm(model_name): - return AnyscaleLLM(model_name=model_name) - - -def test_init_with_custom_params(anyscale_llm): - llm = AnyscaleLLM( - model_name="test_model_name", - api_key="test_api_key", - temperature=0.9, - top_p=0.95, - n=3, - ) - - assert llm.model_name == "test_model_name" - assert llm.default_model_params["temperature"] == 0.9 - assert llm.default_model_params["top_p"] == 0.95 - assert llm.default_model_params["n"] == 3 - assert llm._client.api_key == "test_api_key" - - -def test_chat_completion(anyscale_llm, messages): - response = anyscale_llm.chat_completion(system_prompt=SYSTEM_PROMPT, - chat_history=messages) - assert_chat_completion(response) - - -def test_chat_completion_with_context(anyscale_llm, messages): - response = anyscale_llm.chat_completion(system_prompt=SYSTEM_PROMPT, - chat_history=messages, - context=Context( - content=StringContextContent( - __root__="context from kb" - ), - num_tokens=5) - ) - assert_chat_completion(response) - - -def test_chat_completion_high_temperature( - anyscale_llm, messages, model_params_high_temperature -): - response = anyscale_llm.chat_completion( - system_prompt=SYSTEM_PROMPT, - chat_history=messages, - model_params=model_params_high_temperature - ) - assert_chat_completion(response, num_choices=model_params_high_temperature["n"]) - - -def test_chat_completion_low_temperature( - anyscale_llm, messages, model_params_low_temperature -): - response = anyscale_llm.chat_completion(system_prompt=SYSTEM_PROMPT, - chat_history=messages, - model_params=model_params_low_temperature) - assert_chat_completion(response, num_choices=model_params_low_temperature["n"]) - - -def test_chat_streaming(anyscale_llm, messages): - stream = True - response = anyscale_llm.chat_completion(system_prompt=SYSTEM_PROMPT, - chat_history=messages, - stream=stream) - messages_received = [message for message in response] - assert len(messages_received) > 0 - for message in messages_received: - assert isinstance(message, StreamingChatChunk) - - -def test_max_tokens(anyscale_llm, messages): - max_tokens = 2 - response = anyscale_llm.chat_completion( - system_prompt=SYSTEM_PROMPT, - chat_history=messages, - max_tokens=max_tokens - ) - assert isinstance(response, ChatResponse) - assert len(response.choices[0].message.content.split()) <= max_tokens - - -def test_negative_max_tokens(anyscale_llm, messages): - with pytest.raises(RuntimeError): - anyscale_llm.chat_completion(system_prompt=SYSTEM_PROMPT, - chat_history=messages, - max_tokens=-5) - - -def test_chat_complete_api_failure_populates(anyscale_llm, messages): - anyscale_llm._client = MagicMock() - anyscale_llm._client.chat.completions.create.side_effect = Exception( - "API call failed" - ) - - with pytest.raises(Exception, match="API call failed"): - anyscale_llm.chat_completion(system_prompt=SYSTEM_PROMPT, - chat_history=messages) - - -def test_available_models(anyscale_llm): - models = anyscale_llm.available_models - assert isinstance(models, list) - assert len(models) > 0 - assert isinstance(models[0], str) - assert anyscale_llm.model_name in models diff --git a/tests/system/llm/test_openai.py b/tests/system/llm/test_openai.py index 7e34984a..b0d0c21f 100644 --- a/tests/system/llm/test_openai.py +++ b/tests/system/llm/test_openai.py @@ -4,7 +4,7 @@ import jsonschema import pytest -from canopy.llm import AzureOpenAILLM +from canopy.llm import AzureOpenAILLM, AnyscaleLLM from canopy.models.data_models import Role, MessageBase, Context, StringContextContent # noqa from canopy.models.api_models import ChatResponse, StreamingChatChunk # noqa from canopy.llm.openai import OpenAILLM # noqa @@ -33,11 +33,6 @@ def assert_function_call_format(result): assert len(result["queries"][0]) > 0 -@pytest.fixture -def model_name(): - return "gpt-3.5-turbo-0613" - - @pytest.fixture def function_query_knowledgebase(): return Function( @@ -65,8 +60,8 @@ def model_params_low_temperature(): return {"temperature": 0.2, "top_p": 0.5, "n": 1} -@pytest.fixture(params=[OpenAILLM, AzureOpenAILLM]) -def openai_llm(request, model_name): +@pytest.fixture(params=[OpenAILLM, AzureOpenAILLM, AnyscaleLLM]) +def openai_llm(request): llm_class = request.param if llm_class == AzureOpenAILLM: model_name = os.getenv("AZURE_DEPLOYMENT_NAME") @@ -74,6 +69,13 @@ def openai_llm(request, model_name): pytest.skip( "Couldn't find Azure deployment name. Skipping Azure OpenAI tests." ) + elif llm_class == AnyscaleLLM: + if os.getenv("ANYSCALE_API_KEY") is None: + pytest.skip("Couldn't find Anyscale API key. Skipping Anyscale tests.") + model_name = "mistralai/Mistral-7B-Instruct-v0.1" + else: + model_name = "gpt-3.5-turbo-0613" + return llm_class(model_name=model_name) @@ -129,6 +131,9 @@ def test_enforced_function_call(openai_llm, def test_chat_completion_high_temperature(openai_llm, messages, model_params_high_temperature): + if isinstance(openai_llm, AnyscaleLLM): + pytest.skip("Anyscale don't support n>1 for the moment.") + response = openai_llm.chat_completion( system_prompt=SYSTEM_PROMPT, chat_history=messages, @@ -152,6 +157,9 @@ def test_enforced_function_call_high_temperature(openai_llm, messages, function_query_knowledgebase, model_params_high_temperature): + if isinstance(openai_llm, AnyscaleLLM): + pytest.skip("Anyscale don't support n>1 for the moment.") + result = openai_llm.enforced_function_call( system_prompt=SYSTEM_PROMPT, chat_history=messages, @@ -165,11 +173,15 @@ def test_enforced_function_call_low_temperature(openai_llm, messages, function_query_knowledgebase, model_params_low_temperature): + model_params = model_params_low_temperature.copy() + if isinstance(openai_llm, AnyscaleLLM): + model_params["top_p"] = 1.0 + result = openai_llm.enforced_function_call( system_prompt=SYSTEM_PROMPT, chat_history=messages, function=function_query_knowledgebase, - model_params=model_params_low_temperature + model_params=model_params ) assert_function_call_format(result) @@ -177,8 +189,11 @@ def test_enforced_function_call_low_temperature(openai_llm, def test_chat_completion_with_model_name(openai_llm, messages): if isinstance(openai_llm, AzureOpenAILLM): pytest.skip("In Azure the model name has to be a valid deployment") + elif isinstance(openai_llm, AnyscaleLLM): + new_model_name = "meta-llama/Llama-2-7b-chat-hf" + else: + new_model_name = "gpt-3.5-turbo-1106" - new_model_name = "gpt-3.5-turbo-1106" assert new_model_name != openai_llm.model_name, ( "The new model name should be different from the default one. Please change it." ) @@ -265,6 +280,25 @@ def test_enforce_function_wrong_output_schema(openai_llm, "retry did not happen as expected" +def test_enforce_function_unsupported_model(openai_llm, + messages, + function_query_knowledgebase): + if isinstance(openai_llm, AzureOpenAILLM): + pytest.skip("Currently not tested in Azure") + elif isinstance(openai_llm, AnyscaleLLM): + new_model_name = "meta-llama/Llama-2-7b-chat-hf" + else: + new_model_name = "gpt-3.5-turbo-0301" + + with pytest.raises(NotImplementedError): + openai_llm.enforced_function_call( + system_prompt=SYSTEM_PROMPT, + chat_history=messages, + function=function_query_knowledgebase, + model_params={"model": new_model_name} + ) + + def test_available_models(openai_llm): if isinstance(openai_llm, AzureOpenAILLM): pytest.skip("Azure does not support listing models")