From 2faf8ee9910302074ead649701c9abfbb5c4475e Mon Sep 17 00:00:00 2001 From: Matthieu Dartiailh Date: Thu, 4 Jul 2024 08:07:54 +0200 Subject: [PATCH] Fix memory leak in pickle creation (#213) * fix a memory leak related to pickling Atom subclasses The reason for the changes around __slotnames__ are not clear to me but are necessary * cis: fix ruff invocation * tests: update test so that it does show an improvement over main --- .github/workflows/ci.yml | 2 +- atom/src/catom.cpp | 34 ++++++++----- docs/source/examples/example_doc_generator.py | 2 +- tests/test_atom_from_annotations.py | 6 +-- tests/test_mem.py | 42 ++++++++++++++- tests/type_checking/test_subclass.yml | 51 ++++++++++--------- 6 files changed, 93 insertions(+), 44 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 46e98ffb..c0942971 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,7 @@ jobs: - name: Linting if: always() run: | - ruff atom examples tests + ruff check atom examples tests - name: Typing if: always() run: | diff --git a/atom/src/catom.cpp b/atom/src/catom.cpp index 138de377..881a2f46 100644 --- a/atom/src/catom.cpp +++ b/atom/src/catom.cpp @@ -375,8 +375,9 @@ PyObject* CAtom_getstate( CAtom* self ) { cppy::ptr stateptr = PyDict_New(); - if ( !stateptr ) + if ( !stateptr ) { return PyErr_NoMemory(); // LCOV_EXCL_LINE + } cppy::ptr selfptr(pyobject_cast(self), true); @@ -391,26 +392,33 @@ CAtom_getstate( CAtom* self ) // Copy __slots__ if present. This assumes copyreg._slotnames was called // during AtomMeta's initialization { - cppy::ptr typeptr = PyObject_Type(selfptr.get()); - if (!typeptr) - return 0; - cppy::ptr slotnamesptr = typeptr.getattr("__slotnames__"); - if (!slotnamesptr.get()) + PyObject* typedict = Py_TYPE(selfptr.get())->tp_dict; + cppy::ptr slotnamesptr(PyDict_GetItemString(typedict, "__slotnames__"), true); + if ( !slotnamesptr ) { return 0; - if (!PyList_CheckExact(slotnamesptr.get())) + } + if ( !PyList_CheckExact(slotnamesptr.get()) ) { return cppy::system_error( "slot names" ); + } for ( Py_ssize_t i=0; i < PyList_GET_SIZE(slotnamesptr.get()); i++ ) { PyObject *name = PyList_GET_ITEM(slotnamesptr.get(), i); cppy::ptr value = selfptr.getattr(name); - if (!value || PyDict_SetItem(stateptr.get(), name, value.get()) ) + if (!value ) { + // Following CPython impl it is not an error if the attribute is + // not present. + continue; + } + else if ( PyDict_SetItem(stateptr.get(), name, value.get()) ) { return 0; + } } } cppy::ptr membersptr = selfptr.getattr(atom_members); - if ( !membersptr || !PyDict_CheckExact( membersptr.get() ) ) + if ( !membersptr || !PyDict_CheckExact( membersptr.get() ) ) { return cppy::system_error( "atom members" ); + } PyObject *name, *member; Py_ssize_t pos = 0; @@ -421,9 +429,8 @@ CAtom_getstate( CAtom* self ) } int test = PyObject_IsTrue( should_gs.get() ); if ( test == 1) { - PyObject *value = member_cast( member )->getattr( self ); - if (!value || PyDict_SetItem( stateptr.get(), name, value ) ) { - Py_XDECREF( value ); + cppy::ptr value = member_cast( member )->getattr( self ); + if (!value || PyDict_SetItem( stateptr.get(), name, value.get() ) ) { return 0; } } @@ -433,8 +440,9 @@ CAtom_getstate( CAtom* self ) } // Frozen state - if ( self->is_frozen() && PyDict_SetItem(stateptr.get(), atom_flags, Py_None) ) + if ( self->is_frozen() && PyDict_SetItem(stateptr.get(), atom_flags, Py_None) ) { return 0; + } return stateptr.release(); } diff --git a/docs/source/examples/example_doc_generator.py b/docs/source/examples/example_doc_generator.py index f3cbb2a4..d414eba4 100644 --- a/docs/source/examples/example_doc_generator.py +++ b/docs/source/examples/example_doc_generator.py @@ -87,7 +87,7 @@ def generate_example_doc(docs_path, script_path): # Add the script to the Python Path old_python_path = sys.path - sys.path = sys.path + [os.path.dirname(script_path)] + sys.path = [*sys.path, os.path.dirname(script_path)] # Restore Python path. sys.path = old_python_path diff --git a/tests/test_atom_from_annotations.py b/tests/test_atom_from_annotations.py index 31f9773e..f9e00114 100644 --- a/tests/test_atom_from_annotations.py +++ b/tests/test_atom_from_annotations.py @@ -168,7 +168,7 @@ class A(Atom, use_annotations=True): assert A.a.validate_mode[1] == (annotation.__origin__,) elif member is Subclass: if isinstance(annotation.__args__[0], TypeVar): - assert A.a.validate_mode[1] == object + assert A.a.validate_mode[1] is object else: assert A.a.validate_mode[1] == annotation.__args__[0] else: @@ -254,9 +254,9 @@ class A(Atom, use_annotations=True, type_containers=depth): assert type(v) is type(mv) assert f(A()) == mf(A()) else: - assert type(A.a.item) is type(member.item) # noqa: E721 + assert type(A.a.item) is type(member.item) if isinstance(member.item, List): - assert type(A.a.item.item) is type(member.item.item) # noqa: E721 + assert type(A.a.item.item) is type(member.item.item) @pytest.mark.parametrize( diff --git a/tests/test_mem.py b/tests/test_mem.py index 61db9068..c8895840 100644 --- a/tests/test_mem.py +++ b/tests/test_mem.py @@ -1,5 +1,5 @@ # -------------------------------------------------------------------------------------- -# Copyright (c) 2023, Nucleic Development Team. +# Copyright (c) 2023-2024, Nucleic Development Team. # # Distributed under the terms of the Modified BSD License. # @@ -7,8 +7,10 @@ # -------------------------------------------------------------------------------------- import gc import os +import pickle import sys import time +import tracemalloc from multiprocessing import Process import pytest @@ -53,6 +55,13 @@ class RefObj(Atom): "atomref": RefObj, } +PICKLE_MEM_TESTS = { + "dict": DictObj, + "defaultdict": DefaultDictObj, + "list": ListObj, + "set": SetObj, +} + def memtest(cls): # Create object in a loop @@ -105,3 +114,34 @@ def test_mem_usage(label): finally: p.kill() p.join() + + +@pytest.mark.parametrize("label", PICKLE_MEM_TESTS.keys()) +def test_pickle_mem_usage(label): + TestClass = PICKLE_MEM_TESTS[label] + + obj = TestClass() + + for _ in range(100): + pickle.loads(pickle.dumps(obj)) + + gc.collect() + tracemalloc.start() + for i in range(10000): + pck = pickle.dumps(obj) + pickle.loads(pck) + del pck + gc.collect() + for stat in ( + tracemalloc.take_snapshot() + .filter_traces( + [ + tracemalloc.Filter(True, "*/atom/*"), + tracemalloc.Filter(False, "*/tests/*"), + ] + ) + .statistics("lineno") + ): + # not sure why I sometimes see a 2 here but the original buggy version + # reported values > 50 + assert stat.count < 5 diff --git a/tests/type_checking/test_subclass.yml b/tests/type_checking/test_subclass.yml index 768d55ba..9f48ee74 100644 --- a/tests/type_checking/test_subclass.yml +++ b/tests/type_checking/test_subclass.yml @@ -1,60 +1,61 @@ #------------------------------------------------------------------------------ -# Copyright (c) 2021, Nucleic Development Team. +# Copyright (c) 2021-2024, Nucleic Development Team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. #------------------------------------------------------------------------------ - case: subclass + skip: sys.version_info < (3, 9) # 3.8 uses Type[] while 3.9+ uses type[] parametrized: - member: Subclass member_instance: Subclass(A) - member_type: atom.subclass.Subclass[Type[main.A]] - member_value_type: Type[main.A] + member_type: atom.subclass.Subclass[type[main.A]] + member_value_type: type[main.A] - member: Subclass member_instance: Subclass(A, B) - member_type: atom.subclass.Subclass[Type[main.A]] - member_value_type: Type[main.A] + member_type: atom.subclass.Subclass[type[main.A]] + member_value_type: type[main.A] - member: Subclass member_instance: Subclass((A,)) - member_type: atom.subclass.Subclass[Type[main.A]] - member_value_type: Type[main.A] + member_type: atom.subclass.Subclass[type[main.A]] + member_value_type: type[main.A] - member: Subclass member_instance: Subclass((A,), B) - member_type: atom.subclass.Subclass[Type[main.A]] - member_value_type: Type[main.A] + member_type: atom.subclass.Subclass[type[main.A]] + member_value_type: type[main.A] - member: Subclass member_instance: Subclass((int, A)) - member_type: atom.subclass.Subclass[Union[Type[builtins.int], Type[main.A]]] - member_value_type: Union[Type[builtins.int], Type[main.A]] + member_type: atom.subclass.Subclass[Union[type[builtins.int], type[main.A]]] + member_value_type: Union[type[builtins.int], type[main.A]] - member: Subclass member_instance: Subclass((int, A), B) - member_type: atom.subclass.Subclass[Union[Type[builtins.int], Type[main.A]]] - member_value_type: Union[Type[builtins.int], Type[main.A]] + member_type: atom.subclass.Subclass[Union[type[builtins.int], type[main.A]]] + member_value_type: Union[type[builtins.int], type[main.A]] - member: Subclass member_instance: Subclass((int, A, str)) - member_type: atom.subclass.Subclass[Union[Type[builtins.int], Type[main.A], Type[builtins.str]]] - member_value_type: Union[Type[builtins.int], Type[main.A], Type[builtins.str]] + member_type: atom.subclass.Subclass[Union[type[builtins.int], type[main.A], type[builtins.str]]] + member_value_type: Union[type[builtins.int], type[main.A], type[builtins.str]] - member: Subclass member_instance: Subclass((int, A, str), B) - member_type: atom.subclass.Subclass[Union[Type[builtins.int], Type[main.A], Type[builtins.str]]] - member_value_type: Union[Type[builtins.int], Type[main.A], Type[builtins.str]] + member_type: atom.subclass.Subclass[Union[type[builtins.int], type[main.A], type[builtins.str]]] + member_value_type: Union[type[builtins.int], type[main.A], type[builtins.str]] - member: ForwardSubclass member_instance: ForwardSubclass(resolve1) - member_type: atom.subclass.ForwardSubclass[Type[main.A]] - member_value_type: Type[main.A] + member_type: atom.subclass.ForwardSubclass[type[main.A]] + member_value_type: type[main.A] - member: ForwardSubclass member_instance: ForwardSubclass(resolve2) - member_type: atom.subclass.ForwardSubclass[Type[main.A]] - member_value_type: Type[main.A] + member_type: atom.subclass.ForwardSubclass[type[main.A]] + member_value_type: type[main.A] - member: ForwardSubclass member_instance: ForwardSubclass(resolve3) - member_type: atom.subclass.ForwardSubclass[Union[Type[builtins.int], Type[main.A]]] - member_value_type: Union[Type[builtins.int], Type[main.A]] + member_type: atom.subclass.ForwardSubclass[Union[type[builtins.int], type[main.A]]] + member_value_type: Union[type[builtins.int], type[main.A]] - member: ForwardSubclass member_instance: ForwardSubclass(resolve4) - member_type: atom.subclass.ForwardSubclass[Union[Type[builtins.int], Type[main.A], Type[builtins.str]]] - member_value_type: Union[Type[builtins.int], Type[main.A], Type[builtins.str]] + member_type: atom.subclass.ForwardSubclass[Union[type[builtins.int], type[main.A], type[builtins.str]]] + member_value_type: Union[type[builtins.int], type[main.A], type[builtins.str]] main: | import io from typing import Tuple, Type