Skip to content

Commit

Permalink
Merge branch 'fix-file-descriptor-leak-2' into fix-disconnected-runti…
Browse files Browse the repository at this point in the history
…me-stop-agent-loop
  • Loading branch information
tofarr committed Feb 23, 2025
2 parents 028d33a + 8b41359 commit db47541
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 3 deletions.
20 changes: 17 additions & 3 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from litellm.exceptions import (
RateLimitError,
)
from litellm.llms.custom_httpx.http_handler import HTTPHandler
from litellm.types.utils import CostPerToken, ModelResponse, Usage
from litellm.utils import create_pretrained_tokenizer

Expand Down Expand Up @@ -231,8 +232,17 @@ def wrapper(*args, **kwargs):
# Record start time for latency measurement
start_time = time.time()

# we don't support streaming here, thus we get a ModelResponse
resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
# LiteLLM currently have an issue where HttpHandlers are being created but not
# closed. We have submitted a PR to them, (https://github.com/BerriAI/litellm/pull/8711)
# and their dev team say they are in the process of a refactor that will fix this.
# In the meantime, we manage the lifecycle of the HTTPHandler manually.
handler = HTTPHandler(timeout=self.config.timeout)
kwargs['client'] = handler
try:
# we don't support streaming here, thus we get a ModelResponse
resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
finally:
handler.close()

# Calculate and record latency
latency = time.time() - start_time
Expand Down Expand Up @@ -287,7 +297,11 @@ def wrapper(*args, **kwargs):
'messages': messages,
'response': resp,
'args': args,
'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'},
'kwargs': {
k: v
for k, v in kwargs.items()
if k not in ('messages', 'client')
},
'timestamp': time.time(),
'cost': cost,
}
Expand Down
26 changes: 26 additions & 0 deletions tests/unit/test_llm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import copy
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest
Expand Down Expand Up @@ -429,3 +431,27 @@ def test_get_token_count_error_handling(
mock_logger.error.assert_called_once_with(
'Error getting token count for\n model gpt-4o\nToken counting failed'
)


@patch('openhands.llm.llm.litellm_completion')
def test_completion_with_log_completions(mock_litellm_completion, default_config):
with tempfile.TemporaryDirectory() as temp_dir:
default_config.log_completions = True
default_config.log_completions_folder = temp_dir
mock_response = {
'choices': [{'message': {'content': 'This is a mocked response.'}}]
}
mock_litellm_completion.return_value = mock_response

test_llm = LLM(config=default_config)
response = test_llm.completion(
messages=[{'role': 'user', 'content': 'Hello!'}],
stream=False,
drop_params=True,
)
assert (
response['choices'][0]['message']['content'] == 'This is a mocked response.'
)
files = list(Path(temp_dir).iterdir())
# Expect a log to be generated
assert len(files) == 1

0 comments on commit db47541

Please sign in to comment.