diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab62acc..cd4a024 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,7 +51,7 @@ jobs: numpy-version: "latest" runs-on: ubuntu-latest - python-version: "3.9" - numpy-version: "1.22.0" + numpy-version: "1.24.0" runs-on: ubuntu-latest steps: @@ -82,4 +82,4 @@ jobs: pytest -ra --cov --cov-report=xml --cov-report=term --durations=20 - name: Upload coverage report - uses: codecov/codecov-action@v4.3.0 + uses: codecov/codecov-action@v4.5.0 diff --git a/pyproject.toml b/pyproject.toml index 3b7a8e8..0cf7895 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,8 @@ classifiers = [ ] dynamic = ["version"] dependencies = [ - "awkward>=2.5.0", + "awkward>=2.6.7", + "numpy>=1.24.0", ] [project.optional-dependencies] diff --git a/src/ragged/__init__.py b/src/ragged/__init__.py index 8377e92..02af4d1 100644 --- a/src/ragged/__init__.py +++ b/src/ragged/__init__.py @@ -130,7 +130,7 @@ nonzero, where, ) -from ._spec_set_functions import ( +from ._spec_set_functions import ( # pylint: disable=R0401 unique_all, unique_counts, unique_inverse, diff --git a/src/ragged/_helper_functions.py b/src/ragged/_helper_functions.py new file mode 100644 index 0000000..7688543 --- /dev/null +++ b/src/ragged/_helper_functions.py @@ -0,0 +1,20 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/ragged/blob/main/LICENSE +from __future__ import annotations + +import numpy as np + + +def regularise_to_float(t: np.dtype, /) -> np.dtype: + # Ensure compatibility with numpy 2.0.0 + if np.__version__ >= "2.1": + # Just pass and return the input type if the numpy version is not 2.0.0 + return t + + if t in [np.int8, np.uint8, np.bool_, bool]: + return np.float16 + elif t in [np.int16, np.uint16]: + return np.float32 + elif t in [np.int32, np.uint32, np.int64, np.uint64]: + return np.float64 + else: + return t diff --git a/src/ragged/_spec_array_object.py b/src/ragged/_spec_array_object.py index e83fe2e..c3d098b 100644 --- a/src/ragged/_spec_array_object.py +++ b/src/ragged/_spec_array_object.py @@ -16,6 +16,7 @@ import numpy as np from awkward.contents import ( Content, + EmptyArray, ListArray, ListOffsetArray, NumpyArray, @@ -44,7 +45,8 @@ def _shape_dtype(layout: Content) -> tuple[Shape, Dtype]: else: shape = (*shape, None) node = node.content - + if isinstance(node, EmptyArray): + node = node.to_NumpyArray(dtype=np.float64) if isinstance(node, NumpyArray): shape = shape + node.data.shape[1:] return shape, node.data.dtype @@ -244,9 +246,9 @@ def __str__(self) -> str: if len(self._shape) == 0: return f"{self._impl}" elif len(self._shape) == 1: - return f"{ak._prettyprint.valuestr(self._impl, 1, 80)}" + return f"{ak.prettyprint.valuestr(self._impl, 1, 80)}" else: - prep = ak._prettyprint.valuestr(self._impl, 20, 80 - 4)[1:-1].replace( + prep = ak.prettyprint.valuestr(self._impl, 20, 80 - 4)[1:-1].replace( "\n ", "\n " ) return f"[\n {prep}\n]" @@ -259,9 +261,9 @@ def __repr__(self) -> str: if len(self._shape) == 0: return f"ragged.array({self._impl})" elif len(self._shape) == 1: - return f"ragged.array({ak._prettyprint.valuestr(self._impl, 1, 80 - 14)})" + return f"ragged.array({ak.prettyprint.valuestr(self._impl, 1, 80 - 14)})" else: - prep = ak._prettyprint.valuestr(self._impl, 20, 80 - 4)[1:-1].replace( + prep = ak.prettyprint.valuestr(self._impl, 20, 80 - 4)[1:-1].replace( "\n ", "\n " ) return f"ragged.array([\n {prep}\n])" diff --git a/src/ragged/_spec_elementwise_functions.py b/src/ragged/_spec_elementwise_functions.py index 3357c6c..46b8f2a 100644 --- a/src/ragged/_spec_elementwise_functions.py +++ b/src/ragged/_spec_elementwise_functions.py @@ -10,6 +10,7 @@ import numpy as np +from ._helper_functions import regularise_to_float from ._spec_array_object import _box, _unbox, array @@ -414,7 +415,7 @@ def ceil(x: array, /) -> array: https://data-apis.org/array-api/latest/API_specification/generated/array_api.ceil.html """ - return _box(type(x), np.ceil(*_unbox(x)), dtype=x.dtype) + return _box(type(x), np.ceil(*_unbox(x)), dtype=regularise_to_float(x.dtype)) def conj(x: array, /) -> array: @@ -586,7 +587,7 @@ def floor(x: array, /) -> array: https://data-apis.org/array-api/latest/API_specification/generated/array_api.floor.html """ - return _box(type(x), np.floor(*_unbox(x)), dtype=x.dtype) + return _box(type(x), np.floor(*_unbox(x)), dtype=regularise_to_float(x.dtype)) def floor_divide(x1: array, x2: array, /) -> array: diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index c21f885..259ccd9 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -8,6 +8,11 @@ from collections import namedtuple +import awkward as ak +import numpy as np + +import ragged + from ._spec_array_object import array unique_all_result = namedtuple( # pylint: disable=C0103 @@ -47,8 +52,39 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_all.html """ - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 128") # noqa: EM101 + if isinstance(x, ragged.array): + if x.ndim == 0: + return unique_all_result( + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 + indices=ragged.array([0]), + inverse_indices=ragged.array([0]), + counts=ragged.array([1]), + ) + else: + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 + return unique_all_result( + values=ragged.array(np.empty(0, x.dtype)), + indices=ragged.array(np.empty(0, np.int64)), + inverse_indices=ragged.array(np.empty(0, np.int64)), + counts=ragged.array(np.empty(0, np.int64)), + ) + values, indices, inverse_indices, counts = np.unique( + x_flat.layout.data, # pylint: disable=E1101 + return_index=True, + return_inverse=True, + return_counts=True, + equal_nan=False, + ) + return unique_all_result( + values=ragged.array(values), + indices=ragged.array(indices), + inverse_indices=ragged.array(inverse_indices), + counts=ragged.array(counts), + ) + else: + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) unique_counts_result = namedtuple( # pylint: disable=C0103 @@ -77,9 +113,30 @@ def unique_counts(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_counts.html """ - - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 129") # noqa: EM101 + if isinstance(x, ragged.array): + if x.ndim == 0: + return unique_counts_result( + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 + counts=ragged.array([1]), # pylint: disable=W0212 + ) + else: + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 + return unique_counts_result( + values=ragged.array(np.empty(0, x.dtype)), + counts=ragged.array(np.empty(0, np.int64)), + ) + values, counts = np.unique( + x_flat.layout.data, # pylint: disable=E1101 + return_counts=True, + equal_nan=False, + ) + return unique_counts_result( + values=ragged.array(values), counts=ragged.array(counts) + ) + else: + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) unique_inverse_result = namedtuple( # pylint: disable=C0103 @@ -108,9 +165,32 @@ def unique_inverse(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html """ - - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 130") # noqa: EM101 + if isinstance(x, ragged.array): + if x.ndim == 0: + return unique_inverse_result( + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 + inverse_indices=ragged.array([0]), + ) + else: + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 + return unique_inverse_result( + values=ragged.array(np.empty(0, x.dtype)), + inverse_indices=ragged.array(np.empty(0, np.int64)), + ) + values, inverse_indices = np.unique( + x_flat.layout.data, # pylint: disable=E1101 + return_inverse=True, + equal_nan=False, + ) + + return unique_inverse_result( + values=ragged.array(values), + inverse_indices=ragged.array(inverse_indices), + ) + else: + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) def unique_values(x: array, /) -> array: @@ -128,6 +208,15 @@ def unique_values(x: array, /) -> array: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_values.html """ - - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 131") # noqa: EM101 + if isinstance(x, ragged.array): + if x.ndim == 0: + return ragged.array(np.unique(x._impl, equal_nan=False)) # pylint: disable=W0212 + + else: + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 + return ragged.array(np.empty(0, x.dtype)) + return ragged.array(np.unique(x_flat.layout.data, equal_nan=False)) # pylint: disable=E1101 + else: + err = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(err) diff --git a/tests-cuda/test_cuda_spec_set_functions.py b/tests-cuda/test_cuda_spec_set_functions.py new file mode 100644 index 0000000..65b6643 --- /dev/null +++ b/tests-cuda/test_cuda_spec_set_functions.py @@ -0,0 +1,171 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/ragged/blob/main/LICENSE + +""" +https://data-apis.org/array-api/latest/API_specification/set_functions.html +""" + +from __future__ import annotations + +import awkward as ak +import cupy as cp + +import ragged + + +def test_existence(): + assert ragged.unique_all is not None + assert ragged.unique_counts is not None + assert ragged.unique_inverse is not None + assert ragged.unique_values is not None + + +# unique_values tests +def test_can_take_list(): + arr = ragged.array(cp.array([1, 2, 4, 3, 4, 5, 6, 20])) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_empty_arr(): + arr = ragged.array(cp.array([])) + expected_unique_values = ragged.array([]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_moredimensions(): + arr = ragged.array(ak.Array([[1, 2, 2, 3, 4], [5, 6]], backend="cuda")) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_1d_array(): + arr = ragged.array(cp.array([5, 6, 7, 8, 8, 9, 1, 2, 3, 4, 10, 0, 15, 2])) + expected_unique_values = ragged.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15]) + assert ak.to_list(ragged.unique_values(arr)) == ak.to_list(expected_unique_values) + + +# unique_counts tests +def test_can_count_list(): + arr = ragged.array(cp.array([1, 2, 4, 3, 4, 5, 6, 20])) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_unique_counts = ragged.array([1, 1, 1, 2, 1, 1, 1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_count_empty_arr(): + arr = ragged.array(cp.array([])) + expected_unique_values = ragged.array([]) + expected_counts = ragged.array([]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + assert ak.to_list(expected_counts) == ak.to_list(unique_counts) + + +def test_can_count_simple_array(): + arr = ragged.array(cp.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + + +def test_can_count_normal_array(): + arr = ragged.array( + ak.Array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]], backend="cuda") + ) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + + +# unique_inverse tests +def test_can_inverse_list(): + arr = ragged.array(cp.array([1, 2, 4, 3, 4, 5, 6, 20])) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_inverse_indices = ragged.array([0, 1, 3, 2, 3, 4, 5, 6]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_empty_arr(): + arr = ragged.array(cp.array([])) + expected_unique_values = ragged.array([]) + expected_inverse_indices = ragged.array([]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_simple_array(): + arr = ragged.array(ak.Array([[1, 2, 2], [3, 3, 3], [4, 4, 4, 4]], backend="cuda")) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_normal_array(): + arr = ragged.array( + ak.Array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]], backend="cuda") + ) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +# unique_all tests +def test_can_all_list(): + arr = ragged.array(cp.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_unique_indices = ragged.array([0, 1, 3, 6]) + expected_unique_inverse = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + expected_unique_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_all_empty_arr(): + arr = ragged.array(cp.array([])) + expected_unique_values = ragged.array([]) + expected_unique_indices = ragged.array([]) + expected_unique_inverse = ragged.array([]) + expected_unique_counts = ragged.array([]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_all_normal_array(): + arr = ragged.array(ak.Array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]])) + expected_unique_values = ragged.array([2, 3, 4, 5]) + expected_unique_indices = ragged.array([0, 3, 6, 5]) + expected_unique_inverse = ragged.array([0, 0, 0, 1, 1, 3, 2, 2, 2, 2]) + expected_unique_counts = ragged.array([3, 2, 4, 1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) diff --git a/tests/test_spec_elementwise_functions.py b/tests/test_spec_elementwise_functions.py index fdac4a9..c8e03a4 100644 --- a/tests/test_spec_elementwise_functions.py +++ b/tests/test_spec_elementwise_functions.py @@ -14,13 +14,27 @@ with warnings.catch_warnings(): warnings.simplefilter("ignore") - import numpy.array_api as xp import pytest import ragged +from ragged._helper_functions import regularise_to_float + +has_complex_dtype = True +numpy_has_array_api = False devices = ["cpu"] + +try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + import numpy.array_api as xp + + numpy_has_array_api = True + has_complex_dtype = np.dtype("complex128") in xp._dtypes._all_dtypes +except ModuleNotFoundError: + import numpy as xp # noqa: ICN001 + try: import cupy as cp @@ -374,17 +388,34 @@ def test_ceil(device, x): assert xp.ceil(first(x)).dtype == result.dtype +@pytest.mark.skipif( + not numpy_has_array_api, + reason=f"testing only in numpy version 1, but got numpy version {np.__version__}", +) @pytest.mark.parametrize("device", devices) -def test_ceil_int(device, x_int): +def test_ceil_int_1(device, x_int): result = ragged.ceil(x_int.to_device(device)) assert type(result) is type(x_int) assert result.shape == x_int.shape - assert xp.ceil(first(x_int)) == first(result) - assert xp.ceil(first(x_int)).dtype == result.dtype @pytest.mark.skipif( - np.dtype("complex128") not in xp._dtypes._all_dtypes, + numpy_has_array_api, + reason=f"testing only in numpy version 2, but got numpy version {np.__version__}", +) +@pytest.mark.parametrize("device", devices) +def test_ceil_int_2(device, x_int): + result = ragged.ceil(x_int.to_device(device)) + assert type(result) is type(x_int) + assert result.shape == x_int.shape + assert xp.ceil(first(x_int)) == first(result).astype( + regularise_to_float(first(result).dtype) + ) + assert xp.ceil(first(x_int)).dtype == regularise_to_float(result.dtype) + + +@pytest.mark.skipif( + not has_complex_dtype, reason=f"complex not allowed in np.array_api version {np.__version__}", ) @pytest.mark.parametrize("device", devices) @@ -487,13 +518,32 @@ def test_floor(device, x): assert xp.floor(first(x)).dtype == result.dtype +@pytest.mark.skipif( + not numpy_has_array_api, + reason=f"testing only in numpy version 1, but got numpy version {np.__version__}", +) @pytest.mark.parametrize("device", devices) -def test_floor_int(device, x_int): +def test_floor_int_1(device, x_int): + result = ragged.floor( + x_int.to_device(device) + ) # always returns float64 regardless of x_int.dtype + assert type(result) is type(x_int) + assert result.shape == x_int.shape + + +@pytest.mark.skipif( + numpy_has_array_api, + reason=f"testing only in numpy version 2, but got numpy version {np.__version__}", +) +@pytest.mark.parametrize("device", devices) +def test_floor_int_2(device, x_int): result = ragged.floor(x_int.to_device(device)) assert type(result) is type(x_int) assert result.shape == x_int.shape - assert xp.floor(first(x_int)) == first(result) - assert xp.floor(first(x_int)).dtype == result.dtype + assert xp.floor(first(x_int)) == np.asarray(first(result)).astype( + regularise_to_float(first(result).dtype) + ) + assert xp.floor(first(x_int)).dtype == regularise_to_float(result.dtype) @pytest.mark.parametrize("device", devices) @@ -571,7 +621,7 @@ def test_greater_equal_method(device, x, y): @pytest.mark.skipif( - np.dtype("complex128") not in xp._dtypes._all_dtypes, + not has_complex_dtype, reason=f"complex not allowed in np.array_api version {np.__version__}", ) @pytest.mark.parametrize("device", devices) @@ -838,7 +888,7 @@ def test_pow_inplace_method(device, x, y): @pytest.mark.skipif( - np.dtype("complex128") not in xp._dtypes._all_dtypes, + not has_complex_dtype, reason=f"complex not allowed in np.array_api version {np.__version__}", ) @pytest.mark.parametrize("device", devices) @@ -888,7 +938,7 @@ def test_round(device, x): @pytest.mark.skipif( - np.dtype("complex128") not in xp._dtypes._all_dtypes, + not has_complex_dtype, reason=f"complex not allowed in np.array_api version {np.__version__}", ) @pytest.mark.parametrize("device", devices) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 3c06863..3a74cbc 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -6,6 +6,8 @@ from __future__ import annotations +import awkward as ak + import ragged @@ -14,3 +16,231 @@ def test_existence(): assert ragged.unique_counts is not None assert ragged.unique_inverse is not None assert ragged.unique_values is not None + + +# unique_values tests +def test_can_take_list(): + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_moredimensions(): + arr = ragged.array([[1, 2, 2, 3, 4], [5, 6]]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_1d_array(): + arr = ragged.array([5, 6, 7, 8, 8, 9, 1, 2, 3, 4, 10, 0, 15, 2]) + expected_unique_values = ragged.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15]) + assert ak.to_list(ragged.unique_values(arr)) == ak.to_list(expected_unique_values) + + +def test_can_take_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + unique_values = ragged.unique_values(arr) + assert unique_values == expected_unique_values + + +def test_can_take_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + unique_values = ragged.unique_values(arr) + assert unique_values == expected_unique_values + + +# unique_counts tests +def test_can_count_list(): + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_unique_counts = ragged.array([1, 1, 1, 2, 1, 1, 1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_count_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_counts = ragged.array([]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + assert ak.to_list(expected_counts) == ak.to_list(unique_counts) + + +def test_can_count_simple_array(): + arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + + +def test_can_count_normal_array(): + arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + + +def test_can_count_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + expected_counts = ragged.array([1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert unique_values == expected_unique_values + assert unique_counts == expected_counts + + +def test_can_count_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_counts = ragged.array([1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert unique_values == expected_unique_values + assert unique_counts == expected_counts + + +# unique_inverse tests +def test_can_inverse_list(): + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_inverse_indices = ragged.array([0, 1, 3, 2, 3, 4, 5, 6]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_inverse_indices = ragged.array([]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_simple_array(): + arr = ragged.array([[1, 2, 2], [3, 3, 3], [4, 4, 4, 4]]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_normal_array(): + arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + expected_inverse_indices = ragged.array([0]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert unique_values == expected_unique_values + assert inverse_indices == expected_inverse_indices + + +def test_can_inverse_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_inverse_indices = ragged.array([0]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert unique_values == expected_unique_values + assert inverse_indices == expected_inverse_indices + + +# unique_all tests +def test_can_all_list(): + arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_unique_indices = ragged.array([0, 1, 3, 6]) + expected_unique_inverse = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + expected_unique_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_all_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_unique_indices = ragged.array([]) + expected_unique_inverse = ragged.array([]) + expected_unique_counts = ragged.array([]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_all_normal_array(): + arr = ragged.array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([2, 3, 4, 5]) + expected_unique_indices = ragged.array([0, 3, 6, 5]) + expected_unique_inverse = ragged.array([0, 0, 0, 1, 1, 3, 2, 2, 2, 2]) + expected_unique_counts = ragged.array([3, 2, 4, 1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_all_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + expected_unique_indices = ragged.array([0]) + expected_unique_inverse = ragged.array([0]) + expected_unique_counts = ragged.array([1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert unique_values == expected_unique_values + assert unique_indices == expected_unique_indices + assert unique_inverse == expected_unique_inverse + assert unique_counts == expected_unique_counts + + +def test_can_all_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_unique_indices = ragged.array([0]) + expected_unique_inverse = ragged.array([0]) + expected_unique_counts = ragged.array([1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert unique_values == expected_unique_values + assert unique_indices == expected_unique_indices + assert unique_inverse == expected_unique_inverse + assert unique_counts == expected_unique_counts