Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix memory leak in JSON encoder #6620

Merged
merged 3 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions openhands/core/utils/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,32 @@
from openhands.llm.metrics import Metrics


def my_default_encoder(obj):
class OpenHandsJSONEncoder(json.JSONEncoder):
"""Custom JSON encoder that handles datetime and event objects"""
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Event):
return event_to_dict(obj)
if isinstance(obj, Metrics):
return obj.get()
if isinstance(obj, ModelResponse):
return obj.model_dump()
if isinstance(obj, CmdOutputMetadata):
return obj.model_dump()
return json.JSONEncoder().default(obj)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was it actually a leak that never got garbage collected over time, or was it just excessive memory usage from having to construct json.JSONEncoder() over and over again for every object being serialized?


def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Event):
return event_to_dict(obj)
if isinstance(obj, Metrics):
return obj.get()
if isinstance(obj, ModelResponse):
return obj.model_dump()
if isinstance(obj, CmdOutputMetadata):
return obj.model_dump()
return super().default(obj)


# Create a single reusable encoder instance
_json_encoder = OpenHandsJSONEncoder()


def dumps(obj, **kwargs):
"""Serialize an object to str format"""
return json.dumps(obj, default=my_default_encoder, **kwargs)
if not kwargs:
return _json_encoder.encode(obj)
return json.dumps(obj, cls=OpenHandsJSONEncoder, **kwargs)


def loads(json_str, **kwargs):
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/test_json_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import gc
from datetime import datetime

import psutil

from openhands.core.utils.json import dumps


def get_memory_usage():
"""Get current memory usage of the process"""
process = psutil.Process()
return process.memory_info().rss


def test_json_encoder_memory_leak():
# Force garbage collection before test
gc.collect()
initial_memory = get_memory_usage()

# Create a large dataset that will need encoding
large_data = {
'datetime': datetime.now(),
'nested': [{'timestamp': datetime.now()} for _ in range(1000)],
}

# Track memory usage over multiple iterations
memory_samples = []
for i in range(10):
# Perform multiple serializations in each iteration
for _ in range(100):
dumps(large_data)
dumps(large_data, indent=2) # Test with kwargs too

# Force garbage collection
gc.collect()
memory_samples.append(get_memory_usage())

# Check if memory usage is stable (not continuously growing)
# We expect some fluctuation but not a steady increase
max_memory = max(memory_samples)
min_memory = min(memory_samples)
memory_variation = max_memory - min_memory

# Allow for some memory variation (2MB) due to Python's memory management
assert (
memory_variation < 2 * 1024 * 1024
), f'Memory usage unstable: {memory_variation} bytes variation'

# Also check total memory increase from start
final_memory = memory_samples[-1]
memory_increase = final_memory - initial_memory

# Allow for some memory increase (2MB) as some objects may be cached
assert (
memory_increase < 2 * 1024 * 1024
), f'Memory leak detected: {memory_increase} bytes increase'