From da5fca6069521852fa0c39e92f201e0484737f0f Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Fri, 15 Nov 2024 15:49:35 +0100 Subject: [PATCH 1/6] fix non-function calls --- openhands/core/message.py | 8 ++------ openhands/llm/fn_call_converter.py | 10 ++++++---- openhands/llm/llm.py | 21 +++++++++++---------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/openhands/core/message.py b/openhands/core/message.py index a707ea3881ea..aa2ab89fc492 100644 --- a/openhands/core/message.py +++ b/openhands/core/message.py @@ -55,6 +55,7 @@ class Message(BaseModel): content: list[TextContent | ImageContent] = Field(default_factory=list) cache_enabled: bool = False vision_enabled: bool = False + function_calling_enabled: bool = False # function calling # - tool calls (from LLM) tool_calls: list[ChatCompletionMessageToolCall] | None = None @@ -72,12 +73,7 @@ def serialize_model(self) -> dict: # - into a single string: for providers that don't support list of content items (e.g. no vision, no tool calls) # - into a list of content items: the new APIs of providers with vision/prompt caching/tool calls # NOTE: remove this when litellm or providers support the new API - if ( - self.cache_enabled - or self.vision_enabled - or self.tool_call_id is not None - or self.tool_calls is not None - ): + if self.cache_enabled or self.vision_enabled or self.function_calling_enabled: return self._list_serializer() return self._string_serializer() diff --git a/openhands/llm/fn_call_converter.py b/openhands/llm/fn_call_converter.py index da595ec4f364..05a7d70ec71e 100644 --- a/openhands/llm/fn_call_converter.py +++ b/openhands/llm/fn_call_converter.py @@ -319,9 +319,8 @@ def convert_fncall_messages_to_non_fncall_messages( converted_messages = [] first_user_message_encountered = False for message in messages: - role, content = message['role'], message['content'] - if content is None: - content = '' + role = message['role'] + content = message.get('content', '') # 1. SYSTEM MESSAGES # append system prompt suffix to content @@ -338,6 +337,7 @@ def convert_fncall_messages_to_non_fncall_messages( f'Unexpected content type {type(content)}. Expected str or list. Content: {content}' ) converted_messages.append({'role': 'system', 'content': content}) + # 2. USER MESSAGES (no change) elif role == 'user': # Add in-context learning example for the first user message @@ -446,10 +446,12 @@ def convert_fncall_messages_to_non_fncall_messages( f'Unexpected content type {type(content)}. Expected str or list. Content: {content}' ) converted_messages.append({'role': 'assistant', 'content': content}) + # 4. TOOL MESSAGES (tool outputs) elif role == 'tool': # Convert tool result as assistant message - prefix = f'EXECUTION RESULT of [{message["name"]}]:\n' + tool_name = message.get('name', 'function') + prefix = f'EXECUTION RESULT of [{tool_name}]:\n' # and omit "tool_call_id" AND "name" if isinstance(content, str): content = prefix + content diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 0590945995c1..629594e7453c 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -122,6 +122,9 @@ def __init__( drop_params=self.config.drop_params, ) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + self.init_model_info() if self.vision_is_active(): logger.debug('LLM: model has vision enabled') if self.is_caching_prompt_active(): @@ -143,16 +146,6 @@ def __init__( drop_params=self.config.drop_params, ) - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - self.init_model_info() - if self.vision_is_active(): - logger.debug('LLM: model has vision enabled') - if self.is_caching_prompt_active(): - logger.debug('LLM: caching prompt enabled') - if self.is_function_calling_active(): - logger.debug('LLM: model supports function calling') - self._completion_unwrapped = self._completion @self.retry_decorator( @@ -343,6 +336,13 @@ def init_model_info(self): pass logger.debug(f'Model info: {self.model_info}') + if self.config.model.startswith('huggingface'): + # HF doesn't support the OpenAI default value for top_p (1) + logger.debug( + f'Setting top_p to 0.9 for Hugging Face model: {self.config.model}' + ) + self.config.top_p = 0.9 if self.config.top_p == 1 else self.config.top_p + # Set the max tokens in an LM-specific way if not set if self.config.max_input_tokens is None: if ( @@ -567,6 +567,7 @@ def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dic for message in messages: message.cache_enabled = self.is_caching_prompt_active() message.vision_enabled = self.vision_is_active() + message.function_calling_enabled = self.is_function_calling_active() # let pydantic handle the serialization return [message.model_dump() for message in messages] From ba1791a2e447250dee73cf8070ae804bee08b9f8 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Fri, 15 Nov 2024 18:18:19 +0100 Subject: [PATCH 2/6] Update openhands/core/message.py Co-authored-by: Xingyao Wang --- openhands/core/message.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openhands/core/message.py b/openhands/core/message.py index aa2ab89fc492..f63d9ac9248d 100644 --- a/openhands/core/message.py +++ b/openhands/core/message.py @@ -73,7 +73,12 @@ def serialize_model(self) -> dict: # - into a single string: for providers that don't support list of content items (e.g. no vision, no tool calls) # - into a list of content items: the new APIs of providers with vision/prompt caching/tool calls # NOTE: remove this when litellm or providers support the new API - if self.cache_enabled or self.vision_enabled or self.function_calling_enabled: + if ( + self.cache_enabled + or self.vision_enabled + or self.tool_call_id is not None + or self.tool_calls is not None + ): return self._list_serializer() return self._string_serializer() From 4a31c6fc13b9014e89f66d6962bf6dbed5d8b5eb Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Fri, 15 Nov 2024 18:20:24 +0100 Subject: [PATCH 3/6] Update openhands/llm/llm.py Co-authored-by: Xingyao Wang --- openhands/llm/llm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 629594e7453c..f57c2b79a6d4 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -567,7 +567,6 @@ def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dic for message in messages: message.cache_enabled = self.is_caching_prompt_active() message.vision_enabled = self.vision_is_active() - message.function_calling_enabled = self.is_function_calling_active() # let pydantic handle the serialization return [message.model_dump() for message in messages] From 2ea1297c3e483f0fad6aebb9c3d24168ec0964d1 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Fri, 15 Nov 2024 18:20:32 +0100 Subject: [PATCH 4/6] Update openhands/core/message.py Co-authored-by: Xingyao Wang --- openhands/core/message.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openhands/core/message.py b/openhands/core/message.py index f63d9ac9248d..a707ea3881ea 100644 --- a/openhands/core/message.py +++ b/openhands/core/message.py @@ -55,7 +55,6 @@ class Message(BaseModel): content: list[TextContent | ImageContent] = Field(default_factory=list) cache_enabled: bool = False vision_enabled: bool = False - function_calling_enabled: bool = False # function calling # - tool calls (from LLM) tool_calls: list[ChatCompletionMessageToolCall] | None = None From ffca2b43bb6265ffc6e61051369ff5ba212012df Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Fri, 15 Nov 2024 19:29:45 +0100 Subject: [PATCH 5/6] fix tool calls in non-native function calling messages --- openhands/core/message.py | 52 ++++++++++++++++++++---------- openhands/llm/fn_call_converter.py | 2 +- openhands/llm/llm.py | 1 + 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/openhands/core/message.py b/openhands/core/message.py index a707ea3881ea..b3530eb4317e 100644 --- a/openhands/core/message.py +++ b/openhands/core/message.py @@ -56,6 +56,7 @@ class Message(BaseModel): cache_enabled: bool = False vision_enabled: bool = False # function calling + function_calling_enabled: bool = False # - tool calls (from LLM) tool_calls: list[ChatCompletionMessageToolCall] | None = None # - tool execution result (to LLM) @@ -72,22 +73,21 @@ def serialize_model(self) -> dict: # - into a single string: for providers that don't support list of content items (e.g. no vision, no tool calls) # - into a list of content items: the new APIs of providers with vision/prompt caching/tool calls # NOTE: remove this when litellm or providers support the new API - if ( - self.cache_enabled - or self.vision_enabled - or self.tool_call_id is not None - or self.tool_calls is not None - ): + if self.cache_enabled or self.vision_enabled or self.function_calling_enabled: return self._list_serializer() return self._string_serializer() - def _string_serializer(self): + def _string_serializer(self) -> dict: + # convert content to a single string content = '\n'.join( item.text for item in self.content if isinstance(item, TextContent) ) - return {'content': content, 'role': self.role} + message_dict: dict = {'content': content, 'role': self.role} + + # add tool call keys if we have a tool call or response + return self._add_tool_call_keys(message_dict) - def _list_serializer(self): + def _list_serializer(self) -> dict: content: list[dict] = [] role_tool_with_prompt_caching = False for item in self.content: @@ -102,24 +102,42 @@ def _list_serializer(self): elif isinstance(item, ImageContent) and self.vision_enabled: content.extend(d) - ret: dict = {'content': content, 'role': self.role} + message_dict: dict = {'content': content, 'role': self.role} + # pop content if it's empty if not content or ( len(content) == 1 and content[0]['type'] == 'text' and content[0]['text'] == '' ): - ret.pop('content') + message_dict.pop('content') + + # some providers, like HF and Groq/llama, don't support a list here, but a single string + # if not self.function_calling_enabled: + # content_str = '\n'.join([item['text'] for item in content]) + # message_dict['content'] = [{'type': 'text', 'text': content_str}] if role_tool_with_prompt_caching: - ret['cache_control'] = {'type': 'ephemeral'} + message_dict['cache_control'] = {'type': 'ephemeral'} + + # add tool call keys if we have a tool call or response + return self._add_tool_call_keys(message_dict) + def _add_tool_call_keys(self, message_dict: dict) -> dict: + """Add tool call keys if we have a tool call or response. + + NOTE: this is necessary for both native and non-native tool calling""" + + # an assistant message calling a tool + if self.tool_calls is not None: + message_dict['tool_calls'] = self.tool_calls + + # an observation message with tool response if self.tool_call_id is not None: assert ( self.name is not None ), 'name is required when tool_call_id is not None' - ret['tool_call_id'] = self.tool_call_id - ret['name'] = self.name - if self.tool_calls: - ret['tool_calls'] = self.tool_calls - return ret + message_dict['tool_call_id'] = self.tool_call_id + message_dict['name'] = self.name + + return message_dict diff --git a/openhands/llm/fn_call_converter.py b/openhands/llm/fn_call_converter.py index 05a7d70ec71e..134324de084f 100644 --- a/openhands/llm/fn_call_converter.py +++ b/openhands/llm/fn_call_converter.py @@ -449,7 +449,7 @@ def convert_fncall_messages_to_non_fncall_messages( # 4. TOOL MESSAGES (tool outputs) elif role == 'tool': - # Convert tool result as assistant message + # Convert tool result as user message tool_name = message.get('name', 'function') prefix = f'EXECUTION RESULT of [{tool_name}]:\n' # and omit "tool_call_id" AND "name" diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index f57c2b79a6d4..629594e7453c 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -567,6 +567,7 @@ def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dic for message in messages: message.cache_enabled = self.is_caching_prompt_active() message.vision_enabled = self.vision_is_active() + message.function_calling_enabled = self.is_function_calling_active() # let pydantic handle the serialization return [message.model_dump() for message in messages] From f6e70f5b8b8cc318333db80e4dc35ad526b5c1f7 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Thu, 21 Nov 2024 01:40:32 +0100 Subject: [PATCH 6/6] add comment --- openhands/core/message.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/openhands/core/message.py b/openhands/core/message.py index b3530eb4317e..a5b67917eaee 100644 --- a/openhands/core/message.py +++ b/openhands/core/message.py @@ -75,6 +75,7 @@ def serialize_model(self) -> dict: # NOTE: remove this when litellm or providers support the new API if self.cache_enabled or self.vision_enabled or self.function_calling_enabled: return self._list_serializer() + # some providers, like HF and Groq/llama, don't support a list here, but a single string return self._string_serializer() def _string_serializer(self) -> dict: @@ -112,11 +113,6 @@ def _list_serializer(self) -> dict: ): message_dict.pop('content') - # some providers, like HF and Groq/llama, don't support a list here, but a single string - # if not self.function_calling_enabled: - # content_str = '\n'.join([item['text'] for item in content]) - # message_dict['content'] = [{'type': 'text', 'text': content_str}] - if role_tool_with_prompt_caching: message_dict['cache_control'] = {'type': 'ephemeral'}