From eb8d1600c3198bfe2ef51a24f228ccd10a70d6be Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 1 Feb 2025 18:14:08 +0100 Subject: [PATCH] Chore: clean up LLM (prompt caching, supports fn calling), leftover renames (#6095) --- .github/workflows/ghcr-build.yml | 8 +- .../current/usage/architecture/runtime.md | 4 +- .../current/usage/configuration-options.md | 2 +- .../usage/how-to/evaluation-harness.md | 2 +- .../current/usage/architecture/runtime.md | 4 +- .../current/usage/configuration-options.md | 2 +- .../usage/how-to/evaluation-harness.md | 2 +- docs/modules/usage/architecture/runtime.md | 4 +- docs/modules/usage/configuration-options.md | 2 +- .../usage/how-to/evaluation-harness.md | 2 +- openhands/core/exceptions.py | 6 - openhands/llm/llm.py | 194 +++++++++--------- openhands/llm/retry_mixin.py | 6 +- tests/runtime/test_bash.py | 2 +- tests/runtime/test_browsing.py | 2 +- tests/runtime/test_edit.py | 2 +- tests/runtime/test_env_vars.py | 2 +- tests/runtime/test_images.py | 2 +- tests/runtime/test_stress_remote_runtime.py | 2 +- tests/unit/test_llm.py | 21 -- tests/unit/test_prompt_caching.py | 35 ---- 21 files changed, 119 insertions(+), 187 deletions(-) diff --git a/.github/workflows/ghcr-build.yml b/.github/workflows/ghcr-build.yml index 6ec93e78d1db..acdc89f0f495 100644 --- a/.github/workflows/ghcr-build.yml +++ b/.github/workflows/ghcr-build.yml @@ -219,7 +219,7 @@ jobs: exit 1 fi - # Run unit tests with the EventStream runtime Docker images as root + # Run unit tests with the Docker runtime Docker images as root test_runtime_root: name: RT Unit Tests (Root) needs: [ghcr_build_runtime] @@ -286,7 +286,7 @@ jobs: image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }} image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]') - TEST_RUNTIME=eventstream \ + TEST_RUNTIME=docker \ SANDBOX_USER_ID=$(id -u) \ SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \ TEST_IN_CI=true \ @@ -297,7 +297,7 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - # Run unit tests with the EventStream runtime Docker images as openhands user + # Run unit tests with the Docker runtime Docker images as openhands user test_runtime_oh: name: RT Unit Tests (openhands) runs-on: ubuntu-latest @@ -363,7 +363,7 @@ jobs: image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }} image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]') - TEST_RUNTIME=eventstream \ + TEST_RUNTIME=docker \ SANDBOX_USER_ID=$(id -u) \ SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \ TEST_IN_CI=true \ diff --git a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/architecture/runtime.md b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/architecture/runtime.md index 42e1dae5d824..71e121d45d62 100644 --- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/architecture/runtime.md +++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/architecture/runtime.md @@ -1,8 +1,8 @@ -# 📦 Runtime EventStream +# 📦 Runtime Docker -Le Runtime EventStream d'OpenHands est le composant principal qui permet l'exécution sécurisée et flexible des actions des agents d'IA. +Le Runtime Docker d'OpenHands est le composant principal qui permet l'exécution sécurisée et flexible des actions des agents d'IA. Il crée un environnement en bac à sable (sandbox) en utilisant Docker, où du code arbitraire peut être exécuté en toute sécurité sans risquer le système hôte. ## Pourquoi avons-nous besoin d'un runtime en bac à sable ? diff --git a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/configuration-options.md b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/configuration-options.md index 0f22d218b817..7115c85b1e1f 100644 --- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/configuration-options.md +++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/configuration-options.md @@ -163,7 +163,7 @@ Les options de configuration de base sont définies dans la section `[core]` du - `runtime` - Type : `str` - - Valeur par défaut : `"eventstream"` + - Valeur par défaut : `"docker"` - Description : Environnement d'exécution - `default_agent` diff --git a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/evaluation-harness.md b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/evaluation-harness.md index 1e15665e79fa..b0aee6764acf 100644 --- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/evaluation-harness.md +++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/evaluation-harness.md @@ -114,7 +114,7 @@ Pour créer un workflow d'évaluation pour votre benchmark, suivez ces étapes : def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig: config = AppConfig( default_agent=metadata.agent_class, - runtime='eventstream', + runtime='docker', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( base_container_image='your_container_image', diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/architecture/runtime.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/architecture/runtime.md index fe00e9399540..5e01f62da5d8 100644 --- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/architecture/runtime.md +++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/architecture/runtime.md @@ -1,8 +1,8 @@ 以下是翻译后的内容: -# 📦 EventStream 运行时 +# 📦 Docker 运行时 -OpenHands EventStream 运行时是实现 AI 代理操作安全灵活执行的核心组件。 +OpenHands Docker 运行时是实现 AI 代理操作安全灵活执行的核心组件。 它使用 Docker 创建一个沙盒环境,可以安全地运行任意代码而不会危及主机系统。 ## 为什么我们需要沙盒运行时? diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/configuration-options.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/configuration-options.md index b79a65073acc..4676cfd23822 100644 --- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/configuration-options.md +++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/configuration-options.md @@ -162,7 +162,7 @@ - `runtime` - 类型: `str` - - 默认值: `"eventstream"` + - 默认值: `"docker"` - 描述: 运行时环境 - `default_agent` diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/evaluation-harness.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/evaluation-harness.md index 1e3fff538ffb..9872034bd1a3 100644 --- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/evaluation-harness.md +++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/evaluation-harness.md @@ -112,7 +112,7 @@ OpenHands 的主要入口点在 `openhands/core/main.py` 中。以下是它的 def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig: config = AppConfig( default_agent=metadata.agent_class, - runtime='eventstream', + runtime='docker', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( base_container_image='your_container_image', diff --git a/docs/modules/usage/architecture/runtime.md b/docs/modules/usage/architecture/runtime.md index 3aa05056fff2..b08a1ed99bbf 100644 --- a/docs/modules/usage/architecture/runtime.md +++ b/docs/modules/usage/architecture/runtime.md @@ -1,6 +1,6 @@ -# 📦 EventStream Runtime +# 📦 Docker Runtime -The OpenHands EventStream Runtime is the core component that enables secure and flexible execution of AI agent's action. +The OpenHands Docker Runtime is the core component that enables secure and flexible execution of AI agent's action. It creates a sandboxed environment using Docker, where arbitrary code can be run safely without risking the host system. ## Why do we need a sandboxed runtime? diff --git a/docs/modules/usage/configuration-options.md b/docs/modules/usage/configuration-options.md index ff0aa5674cc8..90050765d6ba 100644 --- a/docs/modules/usage/configuration-options.md +++ b/docs/modules/usage/configuration-options.md @@ -126,7 +126,7 @@ The core configuration options are defined in the `[core]` section of the `confi - `runtime` - Type: `str` - - Default: `"eventstream"` + - Default: `"docker"` - Description: Runtime environment - `default_agent` diff --git a/docs/modules/usage/how-to/evaluation-harness.md b/docs/modules/usage/how-to/evaluation-harness.md index 339783ea8d7e..79ecf7fe371b 100644 --- a/docs/modules/usage/how-to/evaluation-harness.md +++ b/docs/modules/usage/how-to/evaluation-harness.md @@ -112,7 +112,7 @@ To create an evaluation workflow for your benchmark, follow these steps: def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig: config = AppConfig( default_agent=metadata.agent_class, - runtime='eventstream', + runtime='docker', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( base_container_image='your_container_image', diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py index 532f8becbf66..db8b9afa8838 100644 --- a/openhands/core/exceptions.py +++ b/openhands/core/exceptions.py @@ -98,12 +98,6 @@ def __init__(self, message='Operation was cancelled'): super().__init__(message) -class CloudFlareBlockageError(Exception): - """Exception raised when a request is blocked by CloudFlare.""" - - pass - - # ============================================ # LLM function calling Exceptions # ============================================ diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index af25baded4c4..5b656387ecc8 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -27,7 +27,6 @@ from litellm.types.utils import CostPerToken, ModelResponse, Usage from litellm.utils import create_pretrained_tokenizer -from openhands.core.exceptions import CloudFlareBlockageError from openhands.core.logger import openhands_logger as logger from openhands.core.message import Message from openhands.llm.debug_mixin import DebugMixin @@ -218,99 +217,86 @@ def wrapper(*args, **kwargs): # log the entire LLM prompt self.log_prompt(messages) - if self.is_caching_prompt_active(): - # Anthropic-specific prompt caching - if 'claude-3' in self.config.model: - kwargs['extra_headers'] = { - 'anthropic-beta': 'prompt-caching-2024-07-31', - } - # set litellm modify_params to the configured value # True by default to allow litellm to do transformations like adding a default message, when a message is empty # NOTE: this setting is global; unlike drop_params, it cannot be overridden in the litellm completion partial litellm.modify_params = self.config.modify_params - try: - # Record start time for latency measurement - start_time = time.time() - # we don't support streaming here, thus we get a ModelResponse - resp: ModelResponse = self._completion_unwrapped(*args, **kwargs) + # Record start time for latency measurement + start_time = time.time() - # Calculate and record latency - latency = time.time() - start_time - response_id = resp.get('id', 'unknown') - self.metrics.add_response_latency(latency, response_id) + # we don't support streaming here, thus we get a ModelResponse + resp: ModelResponse = self._completion_unwrapped(*args, **kwargs) - non_fncall_response = copy.deepcopy(resp) - if mock_function_calling: - assert len(resp.choices) == 1 - assert mock_fncall_tools is not None - non_fncall_response_message = resp.choices[0].message - fn_call_messages_with_response = ( - convert_non_fncall_messages_to_fncall_messages( - messages + [non_fncall_response_message], mock_fncall_tools - ) + # Calculate and record latency + latency = time.time() - start_time + response_id = resp.get('id', 'unknown') + self.metrics.add_response_latency(latency, response_id) + + non_fncall_response = copy.deepcopy(resp) + if mock_function_calling: + assert len(resp.choices) == 1 + assert mock_fncall_tools is not None + non_fncall_response_message = resp.choices[0].message + fn_call_messages_with_response = ( + convert_non_fncall_messages_to_fncall_messages( + messages + [non_fncall_response_message], mock_fncall_tools ) - fn_call_response_message = fn_call_messages_with_response[-1] - if not isinstance(fn_call_response_message, LiteLLMMessage): - fn_call_response_message = LiteLLMMessage( - **fn_call_response_message - ) - resp.choices[0].message = fn_call_response_message - - message_back: str = resp['choices'][0]['message']['content'] or '' - tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][ - 'message' - ].get('tool_calls', []) - if tool_calls: - for tool_call in tool_calls: - fn_name = tool_call.function.name - fn_args = tool_call.function.arguments - message_back += f'\nFunction call: {fn_name}({fn_args})' - - # log the LLM response - self.log_response(message_back) - - # post-process the response first to calculate cost - cost = self._post_completion(resp) - - # log for evals or other scripts that need the raw completion - if self.config.log_completions: - assert self.config.log_completions_folder is not None - log_file = os.path.join( - self.config.log_completions_folder, - # use the metric model name (for draft editor) - f'{self.metrics.model_name.replace("/", "__")}-{time.time()}.json', + ) + fn_call_response_message = fn_call_messages_with_response[-1] + if not isinstance(fn_call_response_message, LiteLLMMessage): + fn_call_response_message = LiteLLMMessage( + **fn_call_response_message ) + resp.choices[0].message = fn_call_response_message + + message_back: str = resp['choices'][0]['message']['content'] or '' + tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][ + 'message' + ].get('tool_calls', []) + if tool_calls: + for tool_call in tool_calls: + fn_name = tool_call.function.name + fn_args = tool_call.function.arguments + message_back += f'\nFunction call: {fn_name}({fn_args})' + + # log the LLM response + self.log_response(message_back) + + # post-process the response first to calculate cost + cost = self._post_completion(resp) + + # log for evals or other scripts that need the raw completion + if self.config.log_completions: + assert self.config.log_completions_folder is not None + log_file = os.path.join( + self.config.log_completions_folder, + # use the metric model name (for draft editor) + f'{self.metrics.model_name.replace("/", "__")}-{time.time()}.json', + ) + + # set up the dict to be logged + _d = { + 'messages': messages, + 'response': resp, + 'args': args, + 'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'}, + 'timestamp': time.time(), + 'cost': cost, + } + + # if non-native function calling, save messages/response separately + if mock_function_calling: + # Overwrite response as non-fncall to be consistent with messages + _d['response'] = non_fncall_response + + # Save fncall_messages/response separately + _d['fncall_messages'] = original_fncall_messages + _d['fncall_response'] = resp + with open(log_file, 'w') as f: + f.write(json.dumps(_d)) - # set up the dict to be logged - _d = { - 'messages': messages, - 'response': resp, - 'args': args, - 'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'}, - 'timestamp': time.time(), - 'cost': cost, - } - - # if non-native function calling, save messages/response separately - if mock_function_calling: - # Overwrite response as non-fncall to be consistent with messages - _d['response'] = non_fncall_response - - # Save fncall_messages/response separately - _d['fncall_messages'] = original_fncall_messages - _d['fncall_response'] = resp - with open(log_file, 'w') as f: - f.write(json.dumps(_d)) - - return resp - except APIError as e: - if 'Attention Required! | Cloudflare' in str(e): - raise CloudFlareBlockageError( - 'Request blocked by CloudFlare' - ) from e - raise + return resp self._completion = wrapper @@ -414,6 +400,25 @@ def init_model_info(self): ): self.config.max_output_tokens = self.model_info['max_tokens'] + # Initialize function calling capability + # Check if model name is in our supported list + model_name_supported = ( + self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS + or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS + or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS) + ) + + # Handle native_tool_calling user-defined configuration + if self.config.native_tool_calling is None: + self._function_calling_active = model_name_supported + elif self.config.native_tool_calling is False: + self._function_calling_active = False + else: + # try to enable native tool calling if supported by the model + self._function_calling_active = litellm.supports_function_calling( + model=self.config.model + ) + def vision_is_active(self) -> bool: with warnings.catch_warnings(): warnings.simplefilter('ignore') @@ -455,24 +460,11 @@ def is_caching_prompt_active(self) -> bool: ) def is_function_calling_active(self) -> bool: - # Check if model name is in our supported list - model_name_supported = ( - self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS - or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS - or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS) - ) + """Returns whether function calling is supported and enabled for this LLM instance. - # Handle native_tool_calling user-defined configuration - if self.config.native_tool_calling is None: - return model_name_supported - elif self.config.native_tool_calling is False: - return False - else: - # try to enable native tool calling if supported by the model - supports_fn_call = litellm.supports_function_calling( - model=self.config.model - ) - return supports_fn_call + The result is cached during initialization for performance. + """ + return self._function_calling_active def _post_completion(self, response: ModelResponse) -> float: """Post-process the completion response. diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index 714153e4c1a1..08a8add63939 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -24,7 +24,7 @@ def retry_decorator(self, **kwargs): A retry decorator with the parameters customizable in configuration. """ num_retries = kwargs.get('num_retries') - retry_exceptions = kwargs.get('retry_exceptions') + retry_exceptions: tuple = kwargs.get('retry_exceptions', ()) retry_min_wait = kwargs.get('retry_min_wait') retry_max_wait = kwargs.get('retry_max_wait') retry_multiplier = kwargs.get('retry_multiplier') @@ -39,7 +39,9 @@ def before_sleep(retry_state): before_sleep=before_sleep, stop=stop_after_attempt(num_retries) | stop_if_should_exit(), reraise=True, - retry=(retry_if_exception_type(retry_exceptions)), + retry=( + retry_if_exception_type(retry_exceptions) + ), # retry only for these types wait=wait_exponential( multiplier=retry_multiplier, min=retry_min_wait, diff --git a/tests/runtime/test_bash.py b/tests/runtime/test_bash.py index 4af28d9065b0..d107cc9569c8 100644 --- a/tests/runtime/test_bash.py +++ b/tests/runtime/test_bash.py @@ -1,4 +1,4 @@ -"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox.""" +"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.""" import os import time diff --git a/tests/runtime/test_browsing.py b/tests/runtime/test_browsing.py index 6097c891907b..0dee3750953f 100644 --- a/tests/runtime/test_browsing.py +++ b/tests/runtime/test_browsing.py @@ -1,4 +1,4 @@ -"""Browsing-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox.""" +"""Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.""" from conftest import _close_test_runtime, _load_runtime diff --git a/tests/runtime/test_edit.py b/tests/runtime/test_edit.py index 99a7ce113b18..c507166a840d 100644 --- a/tests/runtime/test_edit.py +++ b/tests/runtime/test_edit.py @@ -1,4 +1,4 @@ -"""Edit-related tests for the EventStreamRuntime.""" +"""Edit-related tests for the DockerRuntime.""" import os diff --git a/tests/runtime/test_env_vars.py b/tests/runtime/test_env_vars.py index de65bf8101ed..898003ff66c7 100644 --- a/tests/runtime/test_env_vars.py +++ b/tests/runtime/test_env_vars.py @@ -1,4 +1,4 @@ -"""Env vars related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox.""" +"""Env vars related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.""" import os from unittest.mock import patch diff --git a/tests/runtime/test_images.py b/tests/runtime/test_images.py index 1dd7e295c415..b7ab82b54b3c 100644 --- a/tests/runtime/test_images.py +++ b/tests/runtime/test_images.py @@ -1,4 +1,4 @@ -"""Image-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox.""" +"""Image-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.""" import pytest from conftest import _close_test_runtime, _load_runtime diff --git a/tests/runtime/test_stress_remote_runtime.py b/tests/runtime/test_stress_remote_runtime.py index a2f6c7d2082b..5c201af8b726 100644 --- a/tests/runtime/test_stress_remote_runtime.py +++ b/tests/runtime/test_stress_remote_runtime.py @@ -1,4 +1,4 @@ -"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox.""" +"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.""" import asyncio import os diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 227b0006b020..98783c050d0a 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -389,27 +389,6 @@ def test_completion_with_two_positional_args(mock_litellm_completion, default_co ) # No positional args should be passed to litellm_completion here -@patch('openhands.llm.llm.litellm_completion') -def test_llm_cloudflare_blockage(mock_litellm_completion, default_config): - from litellm.exceptions import APIError - - from openhands.core.exceptions import CloudFlareBlockageError - - llm = LLM(default_config) - mock_litellm_completion.side_effect = APIError( - message='Attention Required! | Cloudflare', - llm_provider='test_provider', - model='test_model', - status_code=403, - ) - - with pytest.raises(CloudFlareBlockageError, match='Request blocked by CloudFlare'): - llm.completion(messages=[{'role': 'user', 'content': 'Hello'}]) - - # Ensure the completion was called - mock_litellm_completion.assert_called_once() - - @patch('openhands.llm.llm.litellm.token_counter') def test_get_token_count_with_dict_messages(mock_token_counter, default_config): mock_token_counter.return_value = 42 diff --git a/tests/unit/test_prompt_caching.py b/tests/unit/test_prompt_caching.py index ea4eeb59935d..3258fa486a9f 100644 --- a/tests/unit/test_prompt_caching.py +++ b/tests/unit/test_prompt_caching.py @@ -128,38 +128,3 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent): assert cached_user_messages[0].content[0].text.startswith('You are OpenHands agent') assert cached_user_messages[2].content[0].text.startswith('User message 1') assert cached_user_messages[3].content[0].text.startswith('User message 1') - - -def test_prompt_caching_headers(codeact_agent: CodeActAgent): - history = list() - # Setup - msg1 = MessageAction('Hello, agent!') - msg1._source = 'user' - history.append(msg1) - msg2 = MessageAction('Hello, user!') - msg2._source = 'agent' - history.append(msg2) - - mock_state = Mock() - mock_state.history = history - mock_state.max_iterations = 5 - mock_state.iteration = 0 - mock_state.extra_data = {} - - codeact_agent.reset() - - # Create a mock for litellm_completion - def check_headers(**kwargs): - assert 'extra_headers' in kwargs - assert 'anthropic-beta' in kwargs['extra_headers'] - assert kwargs['extra_headers']['anthropic-beta'] == 'prompt-caching-2024-07-31' - return ModelResponse( - choices=[{'message': {'content': 'Hello! How can I assist you today?'}}] - ) - - codeact_agent.llm._completion_unwrapped = check_headers - result = codeact_agent.step(mock_state) - - # Assert - assert isinstance(result, MessageAction) - assert result.content == 'Hello! How can I assist you today?'