diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index b40f11ca8396..497578181fc1 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -20,6 +20,7 @@ from litellm.exceptions import ( RateLimitError, ) +from litellm.llms.custom_httpx.http_handler import HTTPHandler from litellm.types.utils import CostPerToken, ModelResponse, Usage from litellm.utils import create_pretrained_tokenizer @@ -231,8 +232,17 @@ def wrapper(*args, **kwargs): # Record start time for latency measurement start_time = time.time() - # we don't support streaming here, thus we get a ModelResponse - resp: ModelResponse = self._completion_unwrapped(*args, **kwargs) + # LiteLLM currently have an issue where HttpHandlers are being created but not + # closed. We have submitted a PR to them, (https://github.com/BerriAI/litellm/pull/8711) + # and their dev team say they are in the process of a refactor that will fix this. + # In the meantime, we manage the lifecycle of the HTTPHandler manually. + handler = HTTPHandler(timeout=self.config.timeout) + kwargs['client'] = handler + try: + # we don't support streaming here, thus we get a ModelResponse + resp: ModelResponse = self._completion_unwrapped(*args, **kwargs) + finally: + handler.close() # Calculate and record latency latency = time.time() - start_time @@ -287,7 +297,11 @@ def wrapper(*args, **kwargs): 'messages': messages, 'response': resp, 'args': args, - 'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'}, + 'kwargs': { + k: v + for k, v in kwargs.items() + if k not in ('messages', 'client') + }, 'timestamp': time.time(), 'cost': cost, } diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 1bfee8550698..9c112106f12a 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -1,4 +1,6 @@ import copy +import tempfile +from pathlib import Path from unittest.mock import MagicMock, patch import pytest @@ -429,3 +431,27 @@ def test_get_token_count_error_handling( mock_logger.error.assert_called_once_with( 'Error getting token count for\n model gpt-4o\nToken counting failed' ) + + +@patch('openhands.llm.llm.litellm_completion') +def test_completion_with_log_completions(mock_litellm_completion, default_config): + with tempfile.TemporaryDirectory() as temp_dir: + default_config.log_completions = True + default_config.log_completions_folder = temp_dir + mock_response = { + 'choices': [{'message': {'content': 'This is a mocked response.'}}] + } + mock_litellm_completion.return_value = mock_response + + test_llm = LLM(config=default_config) + response = test_llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + drop_params=True, + ) + assert ( + response['choices'][0]['message']['content'] == 'This is a mocked response.' + ) + files = list(Path(temp_dir).iterdir()) + # Expect a log to be generated + assert len(files) == 1