-
Notifications
You must be signed in to change notification settings - Fork 181
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Memory Leak when importing hazm inside of a package #284
Comments
سلام. |
Having the same problem with Hazm v0.10.0, I have written this code to examine the required time to import a module as you can see below: import importlib
import sys
import time
import traceback
from collections import defaultdict
class ImportProfiler:
def __init__(self):
self.import_times = defaultdict(float)
self.import_stack = []
self.import_hierarchy = defaultdict(list)
self.original_import = __builtins__.__import__
self.original_modules = set(sys.modules.keys())
def _custom_import(self, name, globals=None, locals=None, fromlist=(), level=0):
"""Custom import function that tracks timing and hierarchy"""
start_time = time.perf_counter()
# Record the import hierarchy
if self.import_stack:
self.import_hierarchy[self.import_stack[-1]].append(name)
self.import_stack.append(name)
try:
# Perform the actual import
result = self.original_import(name, globals, locals, fromlist, level)
# Record the time taken
end_time = time.perf_counter()
import_time = end_time - start_time
self.import_times[name] += import_time
return result
finally:
self.import_stack.pop()
def clear_module_cache(self, target_module):
"""Clear all new modules imported during profiling"""
current_modules = set(sys.modules.keys())
modules_to_remove = current_modules - self.original_modules
# First, find all modules that start with the target module name
# This catches submodules like 'pandas.core', 'pandas.io', etc.
related_modules = {name for name in modules_to_remove
if name == target_module or name.startswith(target_module + '.')}
# Remove all new modules and their dependencies
for module_name in modules_to_remove:
sys.modules.pop(module_name, None)
return related_modules
def start_profiling(self):
"""Start profiling imports"""
__builtins__.__import__ = self._custom_import
def stop_profiling(self):
"""Stop profiling imports"""
__builtins__.__import__ = self.original_import
def print_report(self, threshold_ms=100):
"""Print a detailed report of import times and hierarchy"""
print("\n=== Import Profiling Report ===")
print("\nSlow Imports (above {}ms):".format(threshold_ms))
print("-" * 60)
print(f"{'Module':<40} {'Time (ms)':<10} {'Time (s)':<10}")
print("-" * 60)
# Sort by time taken
sorted_imports = sorted(self.import_times.items(), key=lambda x: x[1], reverse=True)
for module, time_taken in sorted_imports:
if time_taken * 1000 >= threshold_ms:
print(f"{module:<40} {time_taken * 1000:>8.2f}ms {time_taken:>8.4f}s")
print("-" * 60)
def profile_imports(target_module):
"""Profile imports for a specific module"""
profiler = ImportProfiler()
try:
print(f"\nProfiling imports for: {target_module}")
print("=" * 60)
# Start profiling before clearing cache to catch all imports
profiler.start_profiling()
# Clear the module and its dependencies from cache
removed_modules = profiler.clear_module_cache(target_module)
print(f"Cleared {len(removed_modules)} modules from cache")
# Import the target module
importlib.import_module(target_module)
except Exception as e:
print(f"\nError while importing {target_module}:")
traceback.print_exc()
finally:
profiler.stop_profiling()
profiler.print_report()
profile_imports("hazm") Running this code to see which libraries are consuming a lot of time to import, and I found |
Hello.
When we are importing Hazm inside a package in a Django/Faust application (even though we do not use anything from that package) a memory leak happens and memory usage of the docker container passes the memory limit.
We do not have that problem when using Hazm v0.7.0.
The text was updated successfully, but these errors were encountered: