Skip to content

Commit

Permalink
Fix memory leak in JSON encoder
Browse files Browse the repository at this point in the history
- Created OpenHandsJSONEncoder class to properly handle custom types
- Reuse single encoder instance for common case
- Added memory leak test to verify fix
- Improved memory usage by avoiding repeated encoder creation
  • Loading branch information
openhands-agent committed Feb 5, 2025
1 parent e47aaba commit d393208
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 13 deletions.
34 changes: 21 additions & 13 deletions openhands/core/utils/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,32 @@
from openhands.llm.metrics import Metrics


def my_default_encoder(obj):
class OpenHandsJSONEncoder(json.JSONEncoder):
"""Custom JSON encoder that handles datetime and event objects"""
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Event):
return event_to_dict(obj)
if isinstance(obj, Metrics):
return obj.get()
if isinstance(obj, ModelResponse):
return obj.model_dump()
if isinstance(obj, CmdOutputMetadata):
return obj.model_dump()
return json.JSONEncoder().default(obj)

def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Event):
return event_to_dict(obj)
if isinstance(obj, Metrics):
return obj.get()
if isinstance(obj, ModelResponse):
return obj.model_dump()
if isinstance(obj, CmdOutputMetadata):
return obj.model_dump()
return super().default(obj)


# Create a single reusable encoder instance
_json_encoder = OpenHandsJSONEncoder()


def dumps(obj, **kwargs):
"""Serialize an object to str format"""
return json.dumps(obj, default=my_default_encoder, **kwargs)
if not kwargs:
return _json_encoder.encode(obj)
return json.dumps(obj, cls=OpenHandsJSONEncoder, **kwargs)


def loads(json_str, **kwargs):
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/test_json_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import gc
from datetime import datetime

import psutil

from openhands.core.utils.json import dumps


def get_memory_usage():
"""Get current memory usage of the process"""
process = psutil.Process()
return process.memory_info().rss


def test_json_encoder_memory_leak():
# Force garbage collection before test
gc.collect()
initial_memory = get_memory_usage()

# Create a large dataset that will need encoding
large_data = {
'datetime': datetime.now(),
'nested': [{'timestamp': datetime.now()} for _ in range(1000)],
}

# Track memory usage over multiple iterations
memory_samples = []
for i in range(10):
# Perform multiple serializations in each iteration
for _ in range(100):
dumps(large_data)
dumps(large_data, indent=2) # Test with kwargs too

# Force garbage collection
gc.collect()
memory_samples.append(get_memory_usage())

# Check if memory usage is stable (not continuously growing)
# We expect some fluctuation but not a steady increase
max_memory = max(memory_samples)
min_memory = min(memory_samples)
memory_variation = max_memory - min_memory

# Allow for some memory variation (2MB) due to Python's memory management
assert (
memory_variation < 2 * 1024 * 1024
), f'Memory usage unstable: {memory_variation} bytes variation'

# Also check total memory increase from start
final_memory = memory_samples[-1]
memory_increase = final_memory - initial_memory

# Allow for some memory increase (2MB) as some objects may be cached
assert (
memory_increase < 2 * 1024 * 1024
), f'Memory leak detected: {memory_increase} bytes increase'

0 comments on commit d393208

Please sign in to comment.