From 27faa82890635a25e6ae053f4285bf7560d394bb Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Wed, 18 Dec 2024 09:11:48 -0500 Subject: [PATCH 1/3] fix: control attrs better as described in issue #3277 (#3344) * control attrs better as described in issue #3277 * break cyclic ref with weakref * ensure transients are strings * style: pre-commit fixes * fix doc string Co-authored-by: Angus Hollands --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Angus Hollands --- src/awkward/_attrs.py | 42 ++++++++++++++- src/awkward/highlevel.py | 30 +++++------ tests/test_2757_attrs_metadata.py | 52 +++++++++---------- ...ize_and_deserialize_behaviour_for_numba.py | 2 +- tests/test_2806_attrs_typetracer.py | 4 +- tests/test_2837_ufunc_attrs_behavior.py | 6 +-- tests/test_2866_getitem_attrs.py | 18 +++---- ...est_3277_attrs_behavior_on_array_copies.py | 17 ++++++ 8 files changed, 114 insertions(+), 57 deletions(-) create mode 100644 tests/test_3277_attrs_behavior_on_array_copies.py diff --git a/src/awkward/_attrs.py b/src/awkward/_attrs.py index 14a42549d2..cf7a84f9fc 100644 --- a/src/awkward/_attrs.py +++ b/src/awkward/_attrs.py @@ -1,7 +1,9 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE from __future__ import annotations +import weakref from collections.abc import Mapping +from types import MappingProxyType from awkward._typing import Any, JSONMapping @@ -41,4 +43,42 @@ def attrs_of(*arrays, attrs: Mapping | None = None) -> Mapping: def without_transient_attrs(attrs: dict[str, Any]) -> JSONMapping: - return {k: v for k, v in attrs.items() if not k.startswith("@")} + return { + k: v for k, v in attrs.items() if not (isinstance(k, str) and k.startswith("@")) + } + + +class Attrs(Mapping): + def __init__(self, ref, data: Mapping[str, Any]): + self._ref = weakref.ref(ref) + self._data = _freeze_attrs(data) + + def __getitem__(self, key: str): + return self._data[key] + + def __setitem__(self, key: str, value: Any): + ref = self._ref() + if ref is None: + msg = "The reference array has been deleted. If you still need to set attributes, convert this 'Attrs' instance to a dict with '.to_dict()'." + raise ValueError(msg) + ref._attrs = _unfreeze_attrs(self._data) | {key: value} + + def __iter__(self): + return iter(self._data) + + def __len__(self): + return len(self._data) + + def __repr__(self): + return f"Attrs({_unfreeze_attrs(self._data)!r})" + + def to_dict(self): + return _unfreeze_attrs(self._data) + + +def _freeze_attrs(attrs: Mapping[str, Any]) -> Mapping[str, Any]: + return MappingProxyType(attrs) + + +def _unfreeze_attrs(attrs: Mapping[str, Any]) -> dict[str, Any]: + return dict(attrs) diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py index 6d1d6649aa..f9473bdf71 100644 --- a/src/awkward/highlevel.py +++ b/src/awkward/highlevel.py @@ -18,7 +18,7 @@ import awkward as ak import awkward._connect.hist -from awkward._attrs import attrs_of, without_transient_attrs +from awkward._attrs import Attrs, attrs_of, without_transient_attrs from awkward._backends.dispatch import register_backend_lookup_factory from awkward._backends.numpy import NumpyBackend from awkward._behavior import behavior_of, get_array_class, get_record_class @@ -42,7 +42,7 @@ unpickle_record_schema_1, ) from awkward._regularize import is_non_string_like_iterable -from awkward._typing import Any, MutableMapping, TypeVar +from awkward._typing import Any, TypeVar from awkward._util import STDOUT from awkward.prettyprint import Formatter from awkward.prettyprint import valuestr as prettyprint_valuestr @@ -337,7 +337,7 @@ def __init__( if behavior is not None and not isinstance(behavior, Mapping): raise TypeError("behavior must be None or a mapping") - if attrs is not None and not isinstance(attrs, MutableMapping): + if attrs is not None and not isinstance(attrs, Mapping): raise TypeError("attrs must be None or a mapping") if named_axis: @@ -379,9 +379,9 @@ def _update_class(self): self.__class__ = get_array_class(self._layout, self._behavior) @property - def attrs(self) -> Mapping: + def attrs(self) -> Attrs: """ - The mutable mapping containing top-level metadata, which is serialised + The mapping containing top-level metadata, which is serialised with the array during pickling. Keys prefixed with `@` are identified as "transient" attributes @@ -390,14 +390,14 @@ def attrs(self) -> Mapping: """ if self._attrs is None: self._attrs = {} - return self._attrs + return Attrs(self, self._attrs) @attrs.setter def attrs(self, value: Mapping[str, Any]): if isinstance(value, Mapping): - self._attrs = value + self._attrs = dict(value) else: - raise TypeError("attrs must be a mapping") + raise TypeError("attrs must be a 'Attrs' mapping") @property def layout(self): @@ -1846,7 +1846,7 @@ def __init__( if behavior is not None and not isinstance(behavior, Mapping): raise TypeError("behavior must be None or mapping") - if attrs is not None and not isinstance(attrs, MutableMapping): + if attrs is not None and not isinstance(attrs, Mapping): raise TypeError("attrs must be None or a mapping") if named_axis: @@ -1883,7 +1883,7 @@ def _update_class(self): self.__class__ = get_record_class(self._layout, self._behavior) @property - def attrs(self) -> Mapping[str, Any]: + def attrs(self) -> Attrs: """ The mapping containing top-level metadata, which is serialised with the record during pickling. @@ -1894,12 +1894,12 @@ def attrs(self) -> Mapping[str, Any]: """ if self._attrs is None: self._attrs = {} - return self._attrs + return Attrs(self, self._attrs) @attrs.setter def attrs(self, value: Mapping[str, Any]): if isinstance(value, Mapping): - self._attrs = value + self._attrs = dict(value) else: raise TypeError("attrs must be a mapping") @@ -2672,7 +2672,7 @@ def _wrap(cls, layout, behavior=None, attrs=None): return out @property - def attrs(self) -> Mapping[str, Any]: + def attrs(self) -> Attrs: """ The mapping containing top-level metadata, which is serialised with the array during pickling. @@ -2683,12 +2683,12 @@ def attrs(self) -> Mapping[str, Any]: """ if self._attrs is None: self._attrs = {} - return self._attrs + return Attrs(self, self._attrs) @attrs.setter def attrs(self, value: Mapping[str, Any]): if isinstance(value, Mapping): - self._attrs = value + self._attrs = dict(value) else: raise TypeError("attrs must be a mapping") diff --git a/tests/test_2757_attrs_metadata.py b/tests/test_2757_attrs_metadata.py index de04074860..e4356d659f 100644 --- a/tests/test_2757_attrs_metadata.py +++ b/tests/test_2757_attrs_metadata.py @@ -25,7 +25,7 @@ def test_set_attrs(): assert array.attrs == {} array.attrs = OTHER_ATTRS - assert array.attrs is OTHER_ATTRS + assert array.attrs == OTHER_ATTRS with pytest.raises(TypeError): array.attrs = "Hello world!" @@ -52,7 +52,7 @@ def test_transient_metadata_persists(): attrs = {**SOME_ATTRS, "@transient_key": lambda: None} array = ak.Array([[1, 2, 3]], attrs=attrs) num = ak.num(array) - assert num.attrs is attrs + assert num.attrs == attrs @pytest.mark.parametrize( @@ -79,13 +79,13 @@ def test_single_arg_ops(func): # Carry from argument assert ( func([[1, 2, 3, 4], [5], [10]], axis=-1, highlevel=True, attrs=SOME_ATTRS).attrs - is SOME_ATTRS + == SOME_ATTRS ) # Carry from outer array array = ak.Array([[1, 2, 3, 4], [5], [10]], attrs=SOME_ATTRS) - assert func(array, axis=-1, highlevel=True).attrs is SOME_ATTRS + assert func(array, axis=-1, highlevel=True).attrs == SOME_ATTRS # Carry from argument exclusively - assert func(array, axis=-1, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + assert func(array, axis=-1, highlevel=True, attrs=OTHER_ATTRS).attrs == OTHER_ATTRS @pytest.mark.parametrize( @@ -134,15 +134,15 @@ def test_string_operations_unary(func): highlevel=True, attrs=SOME_ATTRS, ).attrs - is SOME_ATTRS + == SOME_ATTRS ) # Carry from outer array array = ak.Array( [["hello", "world!"], [], ["it's a beautiful day!"]], attrs=SOME_ATTRS ) - assert func(array, highlevel=True).attrs is SOME_ATTRS + assert func(array, highlevel=True).attrs == SOME_ATTRS # Carry from argument exclusively - assert func(array, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + assert func(array, highlevel=True, attrs=OTHER_ATTRS).attrs == OTHER_ATTRS @pytest.mark.parametrize( @@ -188,15 +188,15 @@ def test_string_operations_unary_with_arg(func, arg): highlevel=True, attrs=SOME_ATTRS, ).attrs - is SOME_ATTRS + == SOME_ATTRS ) # Carry from outer array array = ak.Array( [["hello", "world!"], [], ["it's a beautiful day!"]], attrs=SOME_ATTRS ) - assert func(array, arg, highlevel=True).attrs is SOME_ATTRS + assert func(array, arg, highlevel=True).attrs == SOME_ATTRS # Carry from argument exclusively - assert func(array, arg, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + assert func(array, arg, highlevel=True, attrs=OTHER_ATTRS).attrs == OTHER_ATTRS def test_string_operations_unary_with_arg_slice(): @@ -220,16 +220,16 @@ def test_string_operations_unary_with_arg_slice(): highlevel=True, attrs=SOME_ATTRS, ).attrs - is SOME_ATTRS + == SOME_ATTRS ) # Carry from outer array array = ak.Array( [["hello", "world!"], [], ["it's a beautiful day!"]], attrs=SOME_ATTRS ) - assert ak.str.slice(array, 1, highlevel=True).attrs is SOME_ATTRS + assert ak.str.slice(array, 1, highlevel=True).attrs == SOME_ATTRS # Carry from argument exclusively assert ( - ak.str.slice(array, 1, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + ak.str.slice(array, 1, highlevel=True, attrs=OTHER_ATTRS).attrs == OTHER_ATTRS ) @@ -262,13 +262,13 @@ def test_string_operations_binary(func): highlevel=True, attrs=SOME_ATTRS, ).attrs - is SOME_ATTRS + == SOME_ATTRS ) # Carry from first array array = ak.Array( [["hello", "world!"], [], ["it's a beautiful day!"]], attrs=SOME_ATTRS ) - assert func(array, ["hello"], highlevel=True).attrs is SOME_ATTRS + assert func(array, ["hello"], highlevel=True).attrs == SOME_ATTRS # Carry from second array value_array = ak.Array(["hello"], attrs=OTHER_ATTRS) @@ -278,7 +278,7 @@ def test_string_operations_binary(func): value_array, highlevel=True, ).attrs - is OTHER_ATTRS + == OTHER_ATTRS ) # Carry from both arrays assert func( @@ -289,7 +289,7 @@ def test_string_operations_binary(func): # Carry from argument assert ( - func(array, value_array, highlevel=True, attrs=OTHER_ATTRS).attrs is OTHER_ATTRS + func(array, value_array, highlevel=True, attrs=OTHER_ATTRS).attrs == OTHER_ATTRS ) @@ -298,8 +298,8 @@ def test_broadcasting_arrays(): right = ak.Array([1], attrs=OTHER_ATTRS) left_result, right_result = ak.broadcast_arrays(left, right) - assert left_result.attrs is SOME_ATTRS - assert right_result.attrs is OTHER_ATTRS + assert left_result.attrs == SOME_ATTRS + assert right_result.attrs == OTHER_ATTRS def test_broadcasting_fields(): @@ -307,29 +307,29 @@ def test_broadcasting_fields(): right = ak.Array([{"y": 1}, {"y": 2}], attrs=OTHER_ATTRS) left_result, right_result = ak.broadcast_fields(left, right) - assert left_result.attrs is SOME_ATTRS - assert right_result.attrs is OTHER_ATTRS + assert left_result.attrs == SOME_ATTRS + assert right_result.attrs == OTHER_ATTRS def test_numba_arraybuilder(): numba = pytest.importorskip("numba") builder = ak.ArrayBuilder(attrs=SOME_ATTRS) - assert builder.attrs is SOME_ATTRS + assert builder.attrs == SOME_ATTRS @numba.njit def func(array): return array - assert func(builder).attrs is SOME_ATTRS + assert func(builder).attrs == SOME_ATTRS def test_numba_array(): numba = pytest.importorskip("numba") array = ak.Array([1, 2, 3], attrs=SOME_ATTRS) - assert array.attrs is SOME_ATTRS + assert array.attrs == SOME_ATTRS @numba.njit def func(array): return array - assert func(array).attrs is SOME_ATTRS + assert func(array).attrs == SOME_ATTRS diff --git a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py index f44c513cc2..028b23cb90 100644 --- a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py +++ b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py @@ -19,7 +19,7 @@ def test_ArrayBuilder_behavior(): SOME_ATTRS = {"FOO": "BAR"} builder = ak.ArrayBuilder(behavior=SOME_ATTRS) - assert builder.behavior is SOME_ATTRS + assert builder.behavior == SOME_ATTRS assert func(builder).behavior == SOME_ATTRS diff --git a/tests/test_2806_attrs_typetracer.py b/tests/test_2806_attrs_typetracer.py index ea466eff04..980728c076 100644 --- a/tests/test_2806_attrs_typetracer.py +++ b/tests/test_2806_attrs_typetracer.py @@ -22,7 +22,7 @@ def test_typetracer_with_report(): form = layout.form_with_key("node{id}") meta, report = typetracer_with_report(form, highlevel=True, attrs=SOME_ATTRS) - assert meta.attrs is SOME_ATTRS + assert meta.attrs == SOME_ATTRS meta, report = typetracer_with_report(form, highlevel=True, attrs=None) assert meta._attrs is None @@ -44,5 +44,5 @@ def test_function(function): "z": [[0.1, 0.1, 0.2], [3, 1, 2], [2, 1, 2]], } ) - assert function(array, attrs=SOME_ATTRS).attrs is SOME_ATTRS + assert function(array, attrs=SOME_ATTRS).attrs == SOME_ATTRS assert function(array)._attrs is None diff --git a/tests/test_2837_ufunc_attrs_behavior.py b/tests/test_2837_ufunc_attrs_behavior.py index 86f2dcedec..0be740fbf1 100644 --- a/tests/test_2837_ufunc_attrs_behavior.py +++ b/tests/test_2837_ufunc_attrs_behavior.py @@ -14,15 +14,15 @@ def test(): def test_unary(): x = ak.Array([1, 2, 3], behavior={"foo": "BAR"}, attrs={"hello": "world"}) y = -x - assert y.attrs is x.attrs + assert y.attrs == x.attrs assert x.behavior is y.behavior def test_two_return(): x = ak.Array([1, 2, 3], behavior={"foo": "BAR"}, attrs={"hello": "world"}) y, y_ret = divmod(x, 2) - assert y.attrs is y_ret.attrs - assert y.attrs is x.attrs + assert y.attrs == y_ret.attrs + assert y.attrs == x.attrs assert y.behavior is y_ret.behavior assert y.behavior is x.behavior diff --git a/tests/test_2866_getitem_attrs.py b/tests/test_2866_getitem_attrs.py index 727edfe214..0524d6dc0e 100644 --- a/tests/test_2866_getitem_attrs.py +++ b/tests/test_2866_getitem_attrs.py @@ -11,24 +11,24 @@ def test_array_slice(): array = ak.Array([[0, 1, 2], [4]], attrs=ATTRS) - assert array.attrs is ATTRS + assert array.attrs == ATTRS - assert array[0].attrs is ATTRS - assert array[1:].attrs is ATTRS + assert array[0].attrs == ATTRS + assert array[1:].attrs == ATTRS def test_array_field(): array = ak.Array([[{"x": 1}, {"x": 2}], [{"x": 10}]], attrs=ATTRS) - assert array.attrs is ATTRS + assert array.attrs == ATTRS - assert array.x.attrs is ATTRS - assert array.x[1:].attrs is ATTRS + assert array.x.attrs == ATTRS + assert array.x[1:].attrs == ATTRS def test_record_field(): array = ak.Array([{"x": [1, 2, 3]}], attrs=ATTRS) - assert array.attrs is ATTRS + assert array.attrs == ATTRS record = array[0] - assert record.attrs is ATTRS - assert record.x.attrs is ATTRS + assert record.attrs == ATTRS + assert record.x.attrs == ATTRS diff --git a/tests/test_3277_attrs_behavior_on_array_copies.py b/tests/test_3277_attrs_behavior_on_array_copies.py new file mode 100644 index 0000000000..988f976adb --- /dev/null +++ b/tests/test_3277_attrs_behavior_on_array_copies.py @@ -0,0 +1,17 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE +# ruff: noqa: E402 + +from __future__ import annotations + +import awkward as ak + + +def test(): + arr = ak.Array([1]) + arr.attrs["foo"] = "bar" + + arr2 = ak.copy(arr) + assert arr2.attrs == arr.attrs + + arr2.attrs["foo"] = "baz" + assert arr2.attrs != arr.attrs From f7812313a6d46d498364e19d386f33d34c431711 Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Wed, 18 Dec 2024 09:36:56 -0500 Subject: [PATCH 2/3] perf: make the ak.Array.mask weakly dependent on the array itself (#3347) * make the ak.Array.mask weakly dependent on the array itself * add test * skip test for pypy * fix more cases where the array doesn't live long enough to create a mask with ak.Array.mask * style: pre-commit fixes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/user-guide/how-to-create-missing.md | 2 +- src/awkward/highlevel.py | 13 ++- tests-cuda/test_2922a_new_cuda_kernels.py | 3 +- tests-cuda/test_3136_cuda_reducers.py | 80 +++++++-------- tests/test_2020_reduce_axis_none.py | 98 ++++++++++++++----- tests/test_2064_fill_none_record.py | 3 +- .../test_3347_weakref_mask_highlevel_array.py | 25 +++++ 7 files changed, 148 insertions(+), 76 deletions(-) create mode 100644 tests/test_3347_weakref_mask_highlevel_array.py diff --git a/docs/user-guide/how-to-create-missing.md b/docs/user-guide/how-to-create-missing.md index affedfd2a1..067f0076ed 100644 --- a/docs/user-guide/how-to-create-missing.md +++ b/docs/user-guide/how-to-create-missing.md @@ -265,6 +265,6 @@ def faster_example(): data, mask = faster_example() -array = ak.Array(data).mask[mask] +array = ak.mask(data, mask) array ``` diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py index f9473bdf71..513d44206d 100644 --- a/src/awkward/highlevel.py +++ b/src/awkward/highlevel.py @@ -12,6 +12,7 @@ import keyword import pickle import re +import weakref from collections.abc import Iterable, Mapping, Sequence, Sized from awkward_cpp.lib import _ext @@ -488,13 +489,19 @@ def named_axis(self) -> AxisMapping: class Mask: def __init__(self, array): - self._array = array + self._array = weakref.ref(array) def __getitem__(self, where): + array = self._array() + if array is None: + msg = "The array to mask was deleted before it could be masked. " + msg += "If you want to construct this mask, you must either keep the array alive " + msg += "or use 'ak.mask' explicitly." + raise ValueError(msg) with ak._errors.OperationErrorContext( - "ak.Array.mask", args=[self._array, where], kwargs={} + "ak.Array.mask", args=[array, where], kwargs={} ): - return ak.operations.mask(self._array, where, valid_when=True) + return ak.operations.mask(array, where, valid_when=True) @property def mask(self): diff --git a/tests-cuda/test_2922a_new_cuda_kernels.py b/tests-cuda/test_2922a_new_cuda_kernels.py index 48fa9dc04c..a872ed4e98 100644 --- a/tests-cuda/test_2922a_new_cuda_kernels.py +++ b/tests-cuda/test_2922a_new_cuda_kernels.py @@ -1003,7 +1003,8 @@ def test_2064_fill_none_record_axis_last(): def test_2064_fill_none_record_option_outside_record(): - record = ak.zip({"x": [1, 4], "y": [2, 3]}).mask[[True, False]] + record = ak.zip({"x": [1, 4], "y": [2, 3]}) + record = record.mask[[True, False]] cuda_record = ak.to_backend(record, "cuda") diff --git a/tests-cuda/test_3136_cuda_reducers.py b/tests-cuda/test_3136_cuda_reducers.py index 06ab47117a..e0d1d8c4ff 100644 --- a/tests-cuda/test_3136_cuda_reducers.py +++ b/tests-cuda/test_3136_cuda_reducers.py @@ -267,11 +267,11 @@ def test_2020_reduce_axis_none_sum(): ak.sum(array, axis=None, keepdims=True), ak.to_regular(ak.Array([[63.0]], backend="cuda")), ) + + arr = ak.Array([[63.0]], backend="cuda") assert ak.almost_equal( ak.sum(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[63.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] - ), + ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]), ) assert ak.sum(array[2], axis=None, mask_identity=True) is None del array @@ -291,13 +291,11 @@ def test_2020_reduce_axis_none_prod(): ak.prod(array[1:], axis=None, keepdims=True), ak.to_regular(ak.Array([[4838400.0]], backend="cuda")), ) + + arr = ak.Array([[4838400.0]], backend="cuda") assert ak.almost_equal( ak.prod(array[1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[4838400.0]], backend="cuda").mask[ - ak.Array([[True]], backend="cuda") - ] - ), + ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]), ) assert ak.prod(array[2], axis=None, mask_identity=True) is None del array @@ -316,19 +314,17 @@ def test_2020_reduce_axis_none_min(): ak.min(array, axis=None, keepdims=True, initial=-100.0, mask_identity=False), ak.to_regular(ak.Array([[-100.0]], backend="cuda")), ) + + arr = ak.Array([[0.0]], backend="cuda") assert ak.almost_equal( ak.min(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[0.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] - ), + ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]), ) + + arr = ak.Array(ak.Array([[np.inf]], backend="cuda")) assert ak.almost_equal( ak.min(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array(ak.Array([[np.inf]], backend="cuda")).mask[ - ak.Array([[False]], backend="cuda") - ] - ), + ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]), ) assert ak.min(array[2], axis=None, mask_identity=True) is None del array @@ -347,19 +343,17 @@ def test_2020_reduce_axis_none_max(): ak.max(array, axis=None, keepdims=True, initial=100.0, mask_identity=False), ak.to_regular(ak.Array([[100.0]], backend="cuda")), ) + + arr = ak.Array([[10.0]], backend="cuda") assert ak.almost_equal( ak.max(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[10.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] - ), + ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]), ) + + arr = ak.Array(ak.Array([[np.inf]], backend="cuda")) assert ak.almost_equal( ak.max(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array(ak.Array([[np.inf]], backend="cuda")).mask[ - ak.Array([[False]], backend="cuda") - ] - ), + ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]), ) assert ak.max(array[2], axis=None, mask_identity=True) is None del array @@ -374,17 +368,17 @@ def test_2020_reduce_axis_none_count(): ak.count(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular(ak.Array([[12]], backend="cuda")), ) + + arr = ak.Array([[12]], backend="cuda") assert ak.almost_equal( ak.count(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[12]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] - ), + ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]), ) + + arr = ak.Array([[0]], backend="cuda") assert ak.almost_equal( ak.count(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")] - ), + ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]), ) assert ak.count(array[2], axis=None, mask_identity=True) is None assert ak.count(array[2], axis=None, mask_identity=False) == 0 @@ -400,17 +394,17 @@ def test_2020_reduce_axis_none_count_nonzero(): ak.count_nonzero(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular(ak.Array([[11]], backend="cuda")), ) + + arr = ak.Array([[11]], backend="cuda") assert ak.almost_equal( ak.count_nonzero(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[11]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] - ), + ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]), ) + + arr = ak.Array([[0]], backend="cuda") assert ak.almost_equal( ak.count_nonzero(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")] - ), + ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]), ) assert ak.count_nonzero(array[2], axis=None, mask_identity=True) is None assert ak.count_nonzero(array[2], axis=None, mask_identity=False) == 0 @@ -422,9 +416,9 @@ def test_2020_reduce_axis_none_std_no_mask_axis_none(): [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" ) out1 = ak.std(array[-1:], axis=None, keepdims=True, mask_identity=True) - out2 = ak.to_regular( - ak.Array([[0.0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")] - ) + + arr = ak.Array([[0.0]], backend="cuda") + out2 = ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]) assert ak.almost_equal(out1, out2) out3 = ak.std(array[2], axis=None, mask_identity=True) @@ -442,13 +436,11 @@ def test_2020_reduce_axis_none_std(): ak.std(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[3.139134700306227]]), ) + + arr = ak.Array([[3.139134700306227]], backend="cuda") cpt.assert_allclose( ak.std(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular( - ak.Array([[3.139134700306227]], backend="cuda").mask[ - ak.Array([[True]], backend="cuda") - ] - ), + ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]), ) assert np.isnan(ak.std(array[2], axis=None, mask_identity=False)) del array diff --git a/tests/test_2020_reduce_axis_none.py b/tests/test_2020_reduce_axis_none.py index 9ecc6f30f4..b621f10024 100644 --- a/tests/test_2020_reduce_axis_none.py +++ b/tests/test_2020_reduce_axis_none.py @@ -15,9 +15,11 @@ def test_sum(): assert ak.almost_equal( ak.sum(array, axis=None, keepdims=True), ak.to_regular([[63.0]]) ) + + arr = ak.Array([[63.0]]) assert ak.almost_equal( ak.sum(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[63.0]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) assert ak.sum(array[2], axis=None, mask_identity=True) is None @@ -31,9 +33,10 @@ def test_prod(): assert ak.almost_equal( ak.prod(array[1:], axis=None, keepdims=True), ak.to_regular([[4838400.0]]) ) + arr = ak.Array([[4838400.0]]) assert ak.almost_equal( ak.prod(array[1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[4838400.0]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) assert ak.prod(array[2], axis=None, mask_identity=True) is None @@ -49,13 +52,16 @@ def test_min(): ak.to_regular([[-100.0]]), ) + arr = ak.Array([[0.0]]) assert ak.almost_equal( ak.min(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0.0]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) + + arr = ak.Array([[np.inf]]) assert ak.almost_equal( ak.min(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[np.inf]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.min(array[2], axis=None, mask_identity=True) is None @@ -70,13 +76,17 @@ def test_max(): ak.max(array, axis=None, keepdims=True, initial=100, mask_identity=False), ak.to_regular([[100.0]]), ) + + arr = ak.Array([[10.0]]) assert ak.almost_equal( ak.max(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[10.0]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) + + arr = ak.Array([[-np.inf]]) assert ak.almost_equal( ak.max(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[np.inf]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.max(array[2], axis=None, mask_identity=True) is None @@ -87,13 +97,17 @@ def test_count(): ak.count(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[12]]), ) + + arr = ak.Array([[12]]) assert ak.almost_equal( ak.count(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[12]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) + + arr = ak.Array([[0]]) assert ak.almost_equal( ak.count(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.count(array[2], axis=None, mask_identity=True) is None assert ak.count(array[2], axis=None, mask_identity=False) == 0 @@ -105,13 +119,17 @@ def test_count_nonzero(): ak.count_nonzero(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[11]]), ) + + arr = ak.Array([[11]]) assert ak.almost_equal( ak.count_nonzero(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[11]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) + + arr = ak.Array([[0]]) assert ak.almost_equal( ak.count_nonzero(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.count_nonzero(array[2], axis=None, mask_identity=True) is None assert ak.count_nonzero(array[2], axis=None, mask_identity=False) == 0 @@ -123,17 +141,20 @@ def test_std(): ak.std(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[3.139134700306227]]), ) + + arr = ak.Array([[3.139134700306227]]) assert ak.almost_equal( ak.std(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[3.139134700306227]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) assert np.isnan(ak.std(array[2], axis=None, mask_identity=False)) def test_std_no_mask_axis_none(): + arr = ak.Array([[0.0]]) assert ak.almost_equal( ak.std(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0.0]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.std(array[2], axis=None, mask_identity=True) is None @@ -144,17 +165,20 @@ def test_var(): ak.var(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[9.854166666666666]]), ) + + arr = ak.Array([[9.854166666666666]]) assert ak.almost_equal( ak.var(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[9.854166666666666]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) assert np.isnan(ak.var(array[2], axis=None, mask_identity=False)) def test_var_no_mask_axis_none(): + arr = ak.Array([[0.0]]) assert ak.almost_equal( ak.var(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0.0]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.var(array[2], axis=None, mask_identity=True) is None @@ -165,17 +189,20 @@ def test_mean(): ak.mean(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[5.25]]), ) + + arr = ak.Array([[5.25]]) assert ak.almost_equal( ak.mean(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[5.25]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) assert np.isnan(ak.mean(array[2], axis=None, mask_identity=False)) def test_mean_no_mask_axis_none(): + arr = ak.Array([[0.0]]) assert ak.almost_equal( ak.mean(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0.0]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.mean(array[2], axis=None, mask_identity=True) is None @@ -186,17 +213,20 @@ def test_ptp(): ak.ptp(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[10.0]]), ) + + arr = ak.Array([[10.0]]) assert ak.almost_equal( ak.ptp(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[10.0]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) assert ak.ptp(array[2], axis=None, mask_identity=False) == pytest.approx(0.0) def test_ptp_no_mask_axis_none(): + arr = ak.Array([[0.0]]) assert ak.almost_equal( ak.ptp(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0.0]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.ptp(array[2], axis=None, mask_identity=True) is None @@ -207,13 +237,17 @@ def test_argmax(): ak.argmax(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[11]]), ) + + arr = ak.Array([[11]]) assert ak.almost_equal( ak.argmax(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[11]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) + + arr = ak.Array([[0]]) assert ak.almost_equal( ak.argmax(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.argmax(array[2], axis=None, mask_identity=True) is None assert ak.argmax(array[2], axis=None, mask_identity=False) == -1 @@ -225,13 +259,17 @@ def test_argmin(): ak.argmin(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[0]]), ) + + arr = ak.Array([[0]]) assert ak.almost_equal( ak.argmin(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[0]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) + + arr = ak.Array([[999]]) assert ak.almost_equal( ak.argmin(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[999]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.argmin(array[2], axis=None, mask_identity=True) is None assert ak.argmin(array[2], axis=None, mask_identity=False) == -1 @@ -243,13 +281,17 @@ def test_any(): ak.any(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[True]]), ) + + arr = ak.Array([[True]]) assert ak.almost_equal( ak.any(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[True]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) + + arr = ak.Array([[True]]) assert ak.almost_equal( ak.any(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[True]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.any(array[2], axis=None, mask_identity=True) is None assert not ak.any(array[2], axis=None, mask_identity=False) @@ -261,13 +303,17 @@ def test_all(): ak.all(array, axis=None, keepdims=True, mask_identity=False), ak.to_regular([[False]]), ) + + arr = ak.Array([[False]]) assert ak.almost_equal( ak.all(array, axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[False]]).mask[[[True]]]), + ak.to_regular(arr.mask[[[True]]]), ) + + arr = ak.Array([[False]]) assert ak.almost_equal( ak.all(array[-1:], axis=None, keepdims=True, mask_identity=True), - ak.to_regular(ak.Array([[False]]).mask[[[False]]]), + ak.to_regular(arr.mask[[[False]]]), ) assert ak.all(array[2], axis=None, mask_identity=True) is None assert ak.all(array[2], axis=None, mask_identity=False) diff --git a/tests/test_2064_fill_none_record.py b/tests/test_2064_fill_none_record.py index e98541911b..169d3bb757 100644 --- a/tests/test_2064_fill_none_record.py +++ b/tests/test_2064_fill_none_record.py @@ -24,5 +24,6 @@ def test_axis_last(): def test_option_outside_record(): - record = ak.zip({"x": [1, 4], "y": [2, 3]}).mask[[True, False]] + record = ak.zip({"x": [1, 4], "y": [2, 3]}) + record = record.mask[[True, False]] assert ak.fill_none(record, 0, axis=-1).to_list() == [{"x": 1, "y": 2}, 0] diff --git a/tests/test_3347_weakref_mask_highlevel_array.py b/tests/test_3347_weakref_mask_highlevel_array.py new file mode 100644 index 0000000000..28be2781cc --- /dev/null +++ b/tests/test_3347_weakref_mask_highlevel_array.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import platform + +import pytest + +import awkward as ak + + +@pytest.mark.skipif( + platform.python_implementation() == "PyPy", + reason="PyPy has a different GC strategy than CPython and thus weakrefs may stay alive a little bit longer than expected, see: https://doc.pypy.org/en/latest/cpython_differences.html#differences-related-to-garbage-collection-strategies", +) +def test_Array_mask_weakref(): + arr = ak.Array([1]) + m = arr.mask + + assert ak.all(m[[True]] == arr) + + del arr + with pytest.raises( + ValueError, + match="The array to mask was deleted before it could be masked. If you want to construct this mask, you must either keep the array alive or use 'ak.mask' explicitly.", + ): + _ = m[[True]] From c59a49c0131186634ae269ea3a176ae70c058c23 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 18 Dec 2024 08:45:06 -0600 Subject: [PATCH 3/3] chore: add autodiff/eager_forward.py to studies directory Files in the studies directory don't affect anything; I'll commit this directly to main. --- studies/autodiff/eager_forward.py | 142 ++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 studies/autodiff/eager_forward.py diff --git a/studies/autodiff/eager_forward.py b/studies/autodiff/eager_forward.py new file mode 100644 index 0000000000..95bfff0101 --- /dev/null +++ b/studies/autodiff/eager_forward.py @@ -0,0 +1,142 @@ +# Eager, forward-mode autodiff (autograd) +# Backpropagation will probably require collecting a DAG with a typetracer or Dask +# +# Presented at https://indico.cern.ch/event/1387764/ +# +# The following are good references: +# +# https://www.hedonisticlearning.com/posts/complex-step-differentiation.html +# https://researchrepository.wvu.edu/faculty_publications/426/ + +import numpy as np +from numpy.lib.mixins import NDArrayOperatorsMixin + +class diffarray(NDArrayOperatorsMixin): + __slots__ = ("_array",) + + @classmethod + def _build(cls, complex_array): + "Manual constructor from a `complex_array`." + self = cls.__new__(cls) + self._array = complex_array + return self + + def __init__(self, primal, tangent=None, *, dtype=None): + "Constructor for floating-point `primal` and (optional) `tangent`." + if dtype is None: + dtype = primal.dtype.type + elif isinstance(dtype, np.dtype): + dtype = dtype.type + + if issubclass(dtype, np.float32): + self._array = primal.astype(np.complex64) + elif issubclass(dtype, np.float64): + self._array = primal.astype(np.complex128) + else: + raise TypeError("only float32 or float64 arrays can be differentiated") + + self._array += (1 if tangent is None else tangent) * 1j * self._step_scale + + @property + def _step_scale(self): + "Size of the complex step; half precision of 1.0." + return 1e-4 if issubclass(self._array.dtype.type, np.complex128) else 1e-8 + + @property + def primal(self): + "Array of primary values." + return np.real(self._array) + + @property + def tangent(self): + "Array of derivatives." + return np.imag(self._array) / self._step_scale + + def __str__(self): + primal = str(self.primal).replace("\n", "\n ") + tangent = str(self.tangent).replace("\n", "\n ") + return f"primal: {primal}\ntangent: {tangent}" + + def __repr__(self): + primal = str(self.primal).replace("\n", "\n ") + tangent = str(self.tangent).replace("\n", "\n ") + dtype = "" + if issubclass(self._array.dtype.type, np.complex64): + dtype = ",\n dtype=np.float32" + return f"diffarray({primal},\n {tangent}{dtype})" + + def _prepare(self, args, kwargs): + "Used in NEP-13 and NEP-18 overrides." + cls = type(self) + args = [x._array if isinstance(x, cls) else x for x in args] + kwargs = {k: v._array if isinstance(x, cls) else v for k, v in kwargs.items()} + return cls, args, kwargs + + def __array_ufunc__(self, ufunc, method, *args, **kwargs): + "https://numpy.org/neps/nep-0013-ufunc-overrides.html" + if ufunc.__name__ == "absolute": + # interpret `absolute` only on the primal + if len(kwargs) != 0: + raise NotImplementedError("kwargs in np.absolute") + arg = args[0]._array + out = arg.copy() + out[arg.real < 0] *= -1 + return type(self)._build(out) + + if ufunc.__name__ in ( + "less", "less_equal", "equal", "not_equal", "greater", "greater_equal" + ): + # do comparisons only on the primal + cls = type(self) + args = [x._array.real if isinstance(x, cls) else x for x in args] + return getattr(ufunc, method)(*args, **kwargs) + + cls, prepared_args, prepared_kwargs = self._prepare(args, kwargs) + out = getattr(ufunc, method)(*prepared_args, **prepared_kwargs) + if issubclass(out.dtype.type, np.complexfloating): + return cls._build(out) + else: + return out + + def __array_function__(self, func, types, args, kwargs): + "https://numpy.org/neps/nep-0018-array-function-protocol.html" + if func.__name__ == "real": + # interpret `real` only on the primal + return type(self)._build(args[0]._array) + if func.__name__ == "imag": + # interpret `imag` only on the primal + return type(self)._build(args[0]._array * 0) + + cls, prepared_args, prepared_kwargs = self._prepare(args, kwargs) + out = func(*prepared_args, **prepared_kwargs) + if issubclass(out.dtype.type, np.complexfloating): + return cls._build(out) + else: + return out + + def __getitem__(self, where): + out = self._array[where] + if isinstance(out, np.complexfloating): + # NumPy returns a scalar; CuPy and Array API return an array + # we return an array to keep derivatives + return type(self)._build(np.asarray(out)) + return out + +# >>> x = np.linspace(-20, 20, 10000) +# >>> da_x = diffarray(x) +# >>> da_y = np.sin(da_x) / da_x +# >>> da_x +# diffarray([-20. -19.9959996 -19.9919992 ... 19.9919992 19.9959996 +# 20. ], +# [1. 1. 1. ... 1. 1. 1.]) +# >>> da_y +# diffarray([0.04564726 0.04557439 0.04550076 ... 0.04550076 0.04557439 0.04564726], +# [-0.01812174 -0.01831149 -0.01850102 ... 0.01850102 0.01831149 +# 0.01812174]) +# >>> abs(da_y.tangent - ((x*np.cos(x) - np.sin(x)) / x**2)).max() +# 3.9683650809863025e-10 +# >>> import matplotlib.pyplot as plt +# >>> plt.plot(x, da_y.tangent) +# >>> plt.plot(x, (x*np.cos(x) - np.sin(x)) / x**2, ls="--") +# +# See https://gist.github.com/jpivarski/8dc48a87bae7a856848f87e36b9d244d for the plot