Skip to content

Commit

Permalink
Merge branch 'main' into pfackeldey/add_bytes_repr
Browse files Browse the repository at this point in the history
  • Loading branch information
pfackeldey committed Dec 18, 2024
2 parents 65135dc + c59a49c commit 90f2d6c
Show file tree
Hide file tree
Showing 15 changed files with 404 additions and 133 deletions.
2 changes: 1 addition & 1 deletion docs/user-guide/how-to-create-missing.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,6 @@ def faster_example():
data, mask = faster_example()
array = ak.Array(data).mask[mask]
array = ak.mask(data, mask)
array
```
42 changes: 41 additions & 1 deletion src/awkward/_attrs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
from __future__ import annotations

import weakref
from collections.abc import Mapping
from types import MappingProxyType

from awkward._typing import Any, JSONMapping

Expand Down Expand Up @@ -41,4 +43,42 @@ def attrs_of(*arrays, attrs: Mapping | None = None) -> Mapping:


def without_transient_attrs(attrs: dict[str, Any]) -> JSONMapping:
return {k: v for k, v in attrs.items() if not k.startswith("@")}
return {
k: v for k, v in attrs.items() if not (isinstance(k, str) and k.startswith("@"))
}


class Attrs(Mapping):
def __init__(self, ref, data: Mapping[str, Any]):
self._ref = weakref.ref(ref)
self._data = _freeze_attrs(data)

def __getitem__(self, key: str):
return self._data[key]

def __setitem__(self, key: str, value: Any):
ref = self._ref()
if ref is None:
msg = "The reference array has been deleted. If you still need to set attributes, convert this 'Attrs' instance to a dict with '.to_dict()'."
raise ValueError(msg)
ref._attrs = _unfreeze_attrs(self._data) | {key: value}

def __iter__(self):
return iter(self._data)

def __len__(self):
return len(self._data)

def __repr__(self):
return f"Attrs({_unfreeze_attrs(self._data)!r})"

def to_dict(self):
return _unfreeze_attrs(self._data)


def _freeze_attrs(attrs: Mapping[str, Any]) -> Mapping[str, Any]:
return MappingProxyType(attrs)


def _unfreeze_attrs(attrs: Mapping[str, Any]) -> dict[str, Any]:
return dict(attrs)
43 changes: 25 additions & 18 deletions src/awkward/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
import keyword
import pickle
import re
import weakref
from collections.abc import Iterable, Mapping, Sequence, Sized

from awkward_cpp.lib import _ext

import awkward as ak
import awkward._connect.hist
from awkward._attrs import attrs_of, without_transient_attrs
from awkward._attrs import Attrs, attrs_of, without_transient_attrs
from awkward._backends.dispatch import register_backend_lookup_factory
from awkward._backends.numpy import NumpyBackend
from awkward._behavior import behavior_of, get_array_class, get_record_class
Expand All @@ -42,7 +43,7 @@
unpickle_record_schema_1,
)
from awkward._regularize import is_non_string_like_iterable
from awkward._typing import Any, MutableMapping, TypeVar
from awkward._typing import Any, TypeVar
from awkward._util import STDOUT
from awkward.prettyprint import Formatter, bytes_repr, highlevel_array_show_rows
from awkward.prettyprint import valuestr as prettyprint_valuestr
Expand Down Expand Up @@ -337,7 +338,7 @@ def __init__(
if behavior is not None and not isinstance(behavior, Mapping):
raise TypeError("behavior must be None or a mapping")

if attrs is not None and not isinstance(attrs, MutableMapping):
if attrs is not None and not isinstance(attrs, Mapping):
raise TypeError("attrs must be None or a mapping")

if named_axis:
Expand Down Expand Up @@ -379,9 +380,9 @@ def _update_class(self):
self.__class__ = get_array_class(self._layout, self._behavior)

@property
def attrs(self) -> Mapping:
def attrs(self) -> Attrs:
"""
The mutable mapping containing top-level metadata, which is serialised
The mapping containing top-level metadata, which is serialised
with the array during pickling.
Keys prefixed with `@` are identified as "transient" attributes
Expand All @@ -390,14 +391,14 @@ def attrs(self) -> Mapping:
"""
if self._attrs is None:
self._attrs = {}
return self._attrs
return Attrs(self, self._attrs)

@attrs.setter
def attrs(self, value: Mapping[str, Any]):
if isinstance(value, Mapping):
self._attrs = value
self._attrs = dict(value)
else:
raise TypeError("attrs must be a mapping")
raise TypeError("attrs must be a 'Attrs' mapping")

@property
def layout(self):
Expand Down Expand Up @@ -488,13 +489,19 @@ def named_axis(self) -> AxisMapping:

class Mask:
def __init__(self, array):
self._array = array
self._array = weakref.ref(array)

def __getitem__(self, where):
array = self._array()
if array is None:
msg = "The array to mask was deleted before it could be masked. "
msg += "If you want to construct this mask, you must either keep the array alive "
msg += "or use 'ak.mask' explicitly."
raise ValueError(msg)
with ak._errors.OperationErrorContext(
"ak.Array.mask", args=[self._array, where], kwargs={}
"ak.Array.mask", args=[array, where], kwargs={}
):
return ak.operations.mask(self._array, where, valid_when=True)
return ak.operations.mask(array, where, valid_when=True)

@property
def mask(self):
Expand Down Expand Up @@ -1862,7 +1869,7 @@ def __init__(
if behavior is not None and not isinstance(behavior, Mapping):
raise TypeError("behavior must be None or mapping")

if attrs is not None and not isinstance(attrs, MutableMapping):
if attrs is not None and not isinstance(attrs, Mapping):
raise TypeError("attrs must be None or a mapping")

if named_axis:
Expand Down Expand Up @@ -1899,7 +1906,7 @@ def _update_class(self):
self.__class__ = get_record_class(self._layout, self._behavior)

@property
def attrs(self) -> Mapping[str, Any]:
def attrs(self) -> Attrs:
"""
The mapping containing top-level metadata, which is serialised
with the record during pickling.
Expand All @@ -1910,12 +1917,12 @@ def attrs(self) -> Mapping[str, Any]:
"""
if self._attrs is None:
self._attrs = {}
return self._attrs
return Attrs(self, self._attrs)

@attrs.setter
def attrs(self, value: Mapping[str, Any]):
if isinstance(value, Mapping):
self._attrs = value
self._attrs = dict(value)
else:
raise TypeError("attrs must be a mapping")

Expand Down Expand Up @@ -2706,7 +2713,7 @@ def _wrap(cls, layout, behavior=None, attrs=None):
return out

@property
def attrs(self) -> Mapping[str, Any]:
def attrs(self) -> Attrs:
"""
The mapping containing top-level metadata, which is serialised
with the array during pickling.
Expand All @@ -2717,12 +2724,12 @@ def attrs(self) -> Mapping[str, Any]:
"""
if self._attrs is None:
self._attrs = {}
return self._attrs
return Attrs(self, self._attrs)

@attrs.setter
def attrs(self, value: Mapping[str, Any]):
if isinstance(value, Mapping):
self._attrs = value
self._attrs = dict(value)
else:
raise TypeError("attrs must be a mapping")

Expand Down
142 changes: 142 additions & 0 deletions studies/autodiff/eager_forward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Eager, forward-mode autodiff (autograd)
# Backpropagation will probably require collecting a DAG with a typetracer or Dask
#
# Presented at https://indico.cern.ch/event/1387764/
#
# The following are good references:
#
# https://www.hedonisticlearning.com/posts/complex-step-differentiation.html
# https://researchrepository.wvu.edu/faculty_publications/426/

import numpy as np
from numpy.lib.mixins import NDArrayOperatorsMixin

class diffarray(NDArrayOperatorsMixin):
__slots__ = ("_array",)

@classmethod
def _build(cls, complex_array):
"Manual constructor from a `complex_array`."
self = cls.__new__(cls)
self._array = complex_array
return self

def __init__(self, primal, tangent=None, *, dtype=None):
"Constructor for floating-point `primal` and (optional) `tangent`."
if dtype is None:
dtype = primal.dtype.type
elif isinstance(dtype, np.dtype):
dtype = dtype.type

if issubclass(dtype, np.float32):
self._array = primal.astype(np.complex64)
elif issubclass(dtype, np.float64):
self._array = primal.astype(np.complex128)
else:
raise TypeError("only float32 or float64 arrays can be differentiated")

self._array += (1 if tangent is None else tangent) * 1j * self._step_scale

@property
def _step_scale(self):
"Size of the complex step; half precision of 1.0."
return 1e-4 if issubclass(self._array.dtype.type, np.complex128) else 1e-8

@property
def primal(self):
"Array of primary values."
return np.real(self._array)

@property
def tangent(self):
"Array of derivatives."
return np.imag(self._array) / self._step_scale

def __str__(self):
primal = str(self.primal).replace("\n", "\n ")
tangent = str(self.tangent).replace("\n", "\n ")
return f"primal: {primal}\ntangent: {tangent}"

def __repr__(self):
primal = str(self.primal).replace("\n", "\n ")
tangent = str(self.tangent).replace("\n", "\n ")
dtype = ""
if issubclass(self._array.dtype.type, np.complex64):
dtype = ",\n dtype=np.float32"
return f"diffarray({primal},\n {tangent}{dtype})"

def _prepare(self, args, kwargs):
"Used in NEP-13 and NEP-18 overrides."
cls = type(self)
args = [x._array if isinstance(x, cls) else x for x in args]
kwargs = {k: v._array if isinstance(x, cls) else v for k, v in kwargs.items()}
return cls, args, kwargs

def __array_ufunc__(self, ufunc, method, *args, **kwargs):
"https://numpy.org/neps/nep-0013-ufunc-overrides.html"
if ufunc.__name__ == "absolute":
# interpret `absolute` only on the primal
if len(kwargs) != 0:
raise NotImplementedError("kwargs in np.absolute")
arg = args[0]._array
out = arg.copy()
out[arg.real < 0] *= -1
return type(self)._build(out)

if ufunc.__name__ in (
"less", "less_equal", "equal", "not_equal", "greater", "greater_equal"
):
# do comparisons only on the primal
cls = type(self)
args = [x._array.real if isinstance(x, cls) else x for x in args]
return getattr(ufunc, method)(*args, **kwargs)

cls, prepared_args, prepared_kwargs = self._prepare(args, kwargs)
out = getattr(ufunc, method)(*prepared_args, **prepared_kwargs)
if issubclass(out.dtype.type, np.complexfloating):
return cls._build(out)
else:
return out

def __array_function__(self, func, types, args, kwargs):
"https://numpy.org/neps/nep-0018-array-function-protocol.html"
if func.__name__ == "real":
# interpret `real` only on the primal
return type(self)._build(args[0]._array)
if func.__name__ == "imag":
# interpret `imag` only on the primal
return type(self)._build(args[0]._array * 0)

cls, prepared_args, prepared_kwargs = self._prepare(args, kwargs)
out = func(*prepared_args, **prepared_kwargs)
if issubclass(out.dtype.type, np.complexfloating):
return cls._build(out)
else:
return out

def __getitem__(self, where):
out = self._array[where]
if isinstance(out, np.complexfloating):
# NumPy returns a scalar; CuPy and Array API return an array
# we return an array to keep derivatives
return type(self)._build(np.asarray(out))
return out

# >>> x = np.linspace(-20, 20, 10000)
# >>> da_x = diffarray(x)
# >>> da_y = np.sin(da_x) / da_x
# >>> da_x
# diffarray([-20. -19.9959996 -19.9919992 ... 19.9919992 19.9959996
# 20. ],
# [1. 1. 1. ... 1. 1. 1.])
# >>> da_y
# diffarray([0.04564726 0.04557439 0.04550076 ... 0.04550076 0.04557439 0.04564726],
# [-0.01812174 -0.01831149 -0.01850102 ... 0.01850102 0.01831149
# 0.01812174])
# >>> abs(da_y.tangent - ((x*np.cos(x) - np.sin(x)) / x**2)).max()
# 3.9683650809863025e-10
# >>> import matplotlib.pyplot as plt
# >>> plt.plot(x, da_y.tangent)
# >>> plt.plot(x, (x*np.cos(x) - np.sin(x)) / x**2, ls="--")
#
# See https://gist.github.com/jpivarski/8dc48a87bae7a856848f87e36b9d244d for the plot
3 changes: 2 additions & 1 deletion tests-cuda/test_2922a_new_cuda_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,8 @@ def test_2064_fill_none_record_axis_last():


def test_2064_fill_none_record_option_outside_record():
record = ak.zip({"x": [1, 4], "y": [2, 3]}).mask[[True, False]]
record = ak.zip({"x": [1, 4], "y": [2, 3]})
record = record.mask[[True, False]]

cuda_record = ak.to_backend(record, "cuda")

Expand Down
Loading

0 comments on commit 90f2d6c

Please sign in to comment.