Merge branch 'fix-file-descriptor-leak-2' into fix-disconnected-runti…

…me-stop-agent-loop
All-Hands-AI · Feb 23, 2025 · db47541 · db47541
2 parents 028d33a + 8b41359
commit db47541
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 3 deletions.
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
@@ -20,6 +20,7 @@
 from litellm.exceptions import (
     RateLimitError,
 )
+from litellm.llms.custom_httpx.http_handler import HTTPHandler
 from litellm.types.utils import CostPerToken, ModelResponse, Usage
 from litellm.utils import create_pretrained_tokenizer
 
@@ -231,8 +232,17 @@ def wrapper(*args, **kwargs):
             # Record start time for latency measurement
             start_time = time.time()
 
-            # we don't support streaming here, thus we get a ModelResponse
-            resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
+            # LiteLLM currently have an issue where HttpHandlers are being created but not
+            # closed. We have submitted a PR to them, (https://github.com/BerriAI/litellm/pull/8711)
+            # and their dev team say they are in the process of a refactor that will fix this.
+            # In the meantime, we manage the lifecycle of the HTTPHandler manually.
+            handler = HTTPHandler(timeout=self.config.timeout)
+            kwargs['client'] = handler
+            try:
+                # we don't support streaming here, thus we get a ModelResponse
+                resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
+            finally:
+                handler.close()
 
             # Calculate and record latency
             latency = time.time() - start_time
@@ -287,7 +297,11 @@ def wrapper(*args, **kwargs):
                     'messages': messages,
                     'response': resp,
                     'args': args,
-                    'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'},
+                    'kwargs': {
+                        k: v
+                        for k, v in kwargs.items()
+                        if k not in ('messages', 'client')
+                    },
                     'timestamp': time.time(),
                     'cost': cost,
                 }

diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
@@ -1,4 +1,6 @@
 import copy
+import tempfile
+from pathlib import Path
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -429,3 +431,27 @@ def test_get_token_count_error_handling(
     mock_logger.error.assert_called_once_with(
         'Error getting token count for\n model gpt-4o\nToken counting failed'
     )
+
+
+@patch('openhands.llm.llm.litellm_completion')
+def test_completion_with_log_completions(mock_litellm_completion, default_config):
+    with tempfile.TemporaryDirectory() as temp_dir:
+        default_config.log_completions = True
+        default_config.log_completions_folder = temp_dir
+        mock_response = {
+            'choices': [{'message': {'content': 'This is a mocked response.'}}]
+        }
+        mock_litellm_completion.return_value = mock_response
+
+        test_llm = LLM(config=default_config)
+        response = test_llm.completion(
+            messages=[{'role': 'user', 'content': 'Hello!'}],
+            stream=False,
+            drop_params=True,
+        )
+        assert (
+            response['choices'][0]['message']['content'] == 'This is a mocked response.'
+        )
+        files = list(Path(temp_dir).iterdir())
+        # Expect a log to be generated
+        assert len(files) == 1