scikit-hep · ohrechykha · Jul 30, 2024 · Jul 31, 2024 · Aug 2, 2024
diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py
@@ -8,6 +8,11 @@
 
 from collections import namedtuple
 
+import awkward as ak
+import numpy as np
+
+import ragged
+
 from ._spec_array_object import array
 
 unique_all_result = namedtuple(  # pylint: disable=C0103
@@ -47,8 +52,23 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]:
     https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_all.html
     """
 
-    x  # noqa: B018, pylint: disable=W0104
-    raise NotImplementedError("TODO 128")  # noqa: EM101
+    if not isinstance(x, ragged.array):
+        err = f"Expected ragged type but got {type(x)}"
+        raise TypeError(err)
+
+    if len(x) == 1:
+        return ragged.array(x), ragged.array([0]), ragged.array([0]), ragged.array([1])
+
+    x_flat = ak.ravel(x._impl)
+    values, indices, inverse_indices, counts = np.unique(
+        x_flat.layout.data, return_index=True, return_inverse=True, return_counts=True
+    )
+    return (
+        ragged.array(values),
+        ragged.array(indices),
+        ragged.array(inverse_indices),
+        ragged.array(counts),
+    )
 
 
 unique_counts_result = namedtuple(  # pylint: disable=C0103
@@ -77,9 +97,16 @@ def unique_counts(x: array, /) -> tuple[array, array]:
 
     https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_counts.html
     """
+    if not isinstance(x, ragged.array):
+        err = f"Expected ragged type but got {type(x)}"
+        raise TypeError(err)
+
+    if len(x) == 1:
+        return ragged.array(x), ragged.array([1])
 
-    x  # noqa: B018, pylint: disable=W0104
-    raise NotImplementedError("TODO 129")  # noqa: EM101
+    x_flat = ak.ravel(x._impl)
+    values, counts = np.unique(x_flat.layout.data, return_counts=True)
+    return ragged.array(values), ragged.array(counts)
 
 
 unique_inverse_result = namedtuple(  # pylint: disable=C0103
@@ -108,9 +135,17 @@ def unique_inverse(x: array, /) -> tuple[array, array]:
 
     https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html
     """
+    if not isinstance(x, ragged.array):
+        err = f"Expected ragged type but got {type(x)}"
+        raise TypeError(err)
 
-    x  # noqa: B018, pylint: disable=W0104
-    raise NotImplementedError("TODO 130")  # noqa: EM101
+    if len(x) == 1:
+        return ragged.array(x), ragged.array([0])
+
+    x_flat = ak.ravel(x._impl)
+    values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True)
+
+    return ragged.array(values), ragged.array(inverse_indices)
 
 
 def unique_values(x: array, /) -> array:
@@ -128,6 +163,14 @@ def unique_values(x: array, /) -> array:
 
     https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_values.html
     """
+    if not isinstance(x, ragged.array):
+        err = f"Expected ragged type but got {type(x)}"
+        raise TypeError(err)
+
+    if len(x) == 1:
+        return ragged.array(x)
+
+    x_flat = ak.ravel(x._impl)
+    values = np.unique(x_flat.layout.data)
 
-    x  # noqa: B018, pylint: disable=W0104
-    raise NotImplementedError("TODO 131")  # noqa: EM101
+    return ragged.array(values)
diff --git a/tests/test_spec_elementwise_functions.py b/tests/test_spec_elementwise_functions.py
@@ -11,11 +11,20 @@
 
 import awkward as ak
 import numpy as np
-
+"""
 with warnings.catch_warnings():
     warnings.simplefilter("ignore")
-    import numpy.array_api as xp
+    import array_api_strict as xp
+"""
+has_complex_dtype = True
 
+if np.lib.NumpyVersion(np.__version__) < '2.0.0b1':
+  with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    import array_api_strict as xp
+    has_complex_dtype = True if (np.dtype("complex128") in xp._dtypes._all_dtypes) else False
+else:
+  xp = np
 import pytest
 
 import ragged
@@ -377,14 +386,20 @@ def test_ceil(device, x):
 @pytest.mark.parametrize("device", devices)
 def test_ceil_int(device, x_int):
     result = ragged.ceil(x_int.to_device(device))
+    print(x_int.dtype)
+    print(ragged.ceil(x_int.to_device(device)).dtype)
     assert type(result) is type(x_int)
     assert result.shape == x_int.shape
     assert xp.ceil(first(x_int)) == first(result)
-    assert xp.ceil(first(x_int)).dtype == result.dtype
-
+    print((first(x_int)).dtype)
+    print((xp.ceil(first(x_int))).dtype)
+    print(first(result).dtype)
+    assert np.ceil(first(x_int)).dtype == result.dtype
+    print((np.ceil(first(x_int))).dtype)
+    print(result.dtype)
 
 @pytest.mark.skipif(
-    np.dtype("complex128") not in xp._dtypes._all_dtypes,
+    not has_complex_dtype,
     reason=f"complex not allowed in np.array_api version {np.__version__}",
 )
 @pytest.mark.parametrize("device", devices)
@@ -571,7 +586,7 @@ def test_greater_equal_method(device, x, y):
 
 
 @pytest.mark.skipif(
-    np.dtype("complex128") not in xp._dtypes._all_dtypes,
+    not has_complex_dtype,
     reason=f"complex not allowed in np.array_api version {np.__version__}",
 )
 @pytest.mark.parametrize("device", devices)
@@ -838,7 +853,7 @@ def test_pow_inplace_method(device, x, y):
 
 
 @pytest.mark.skipif(
-    np.dtype("complex128") not in xp._dtypes._all_dtypes,
+    not has_complex_dtype,
     reason=f"complex not allowed in np.array_api version {np.__version__}",
 )
 @pytest.mark.parametrize("device", devices)
@@ -888,7 +903,7 @@ def test_round(device, x):
 
 
 @pytest.mark.skipif(
-    np.dtype("complex128") not in xp._dtypes._all_dtypes,
+    not has_complex_dtype,
     reason=f"complex not allowed in np.array_api version {np.__version__}",
 )
 @pytest.mark.parametrize("device", devices)

diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py
@@ -6,11 +6,180 @@
 
 from __future__ import annotations
 
+import awkward as ak
+import pytest
+
 import ragged
 
+# Specific algorithm for unique_values:
+# 1 take an input array
+# 2 flatten input_array unless its 1d
+# 3 {remember the first element, loop through the rest of the list to see if there are copies
+#    if yes then discard it and repeat the step
+#    if not then add it to the output and repeat the step}
+# 4 once the cycle is over return an array of unique elements in the input array (the output must be of the same type as input array)
+
 
 def test_existence():
     assert ragged.unique_all is not None
     assert ragged.unique_counts is not None
     assert ragged.unique_inverse is not None
     assert ragged.unique_values is not None
+
+
+# unique_values tests
+def test_can_take_none():
+    assert ragged.unique_values(None) == None
+
+
+def test_can_take_list():
+    with pytest.raises(TypeError):
+        assert ragged.unique_values([1, 2, 4, 3, 4, 5, 6, 20])
+
+
+def test_can_take_empty_arr():
+    with pytest.raises(TypeError):
+        assert ragged.unique_values(ragged.array([]))
+
+
+def test_can_take_moredimensions():
+    with pytest.raises(ValueError):
+        assert ragged.unique_values(ragged.array([[1, 2, 3, 4], [5, 6]]))
+
+
+def test_can_take_1d_array():
+    arr = ragged.array([5, 6, 7, 8, 8, 9, 1, 2, 3, 4, 10, 0, 15, 2])
+    expected_unique_values = ragged.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15])
+    assert ak.to_list(ragged.unique_values(arr)) == ak.to_list(expected_unique_values)
+
+
+# unique_counts tests
+def test_can_count_none():
+    with pytest.raises(TypeError):
+        assert ragged.unique_counts(None) is None
+
+
+def test_can_count_list():
+    with pytest.raises(TypeError):
+        assert ragged.unique_counts([1, 2, 4, 3, 4, 5, 6, 20]) is None
+
+
+def test_can_count_simple_array():
+    arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
+    expected_unique_values = ragged.array([1, 2, 3, 4])
+    expected_counts = ragged.array([1, 2, 3, 4])
+    unique_values, unique_counts = ragged.unique_counts(arr)
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(unique_counts) == ak.to_list(expected_counts)
+
+
+def test_can_count_normal_array():
+    arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]])
+    expected_unique_values = ragged.array([1, 2, 3, 4])
+    expected_counts = ragged.array([1, 2, 3, 4])
+    unique_values, unique_counts = ragged.unique_counts(arr)
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(unique_counts) == ak.to_list(expected_counts)
+
+
+def test_can_count_scalar():
+    arr = ragged.array([5])
+    expected_unique_values = ragged.array([5])
+    expected_counts = ragged.array([1])
+    unique_values, unique_counts = ragged.unique_counts(arr)
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(unique_counts) == ak.to_list(expected_counts)
+
+
+# unique_inverse tests
+def test_can_take_none():
+    with pytest.raises(TypeError):
+        assert ragged.unique_inverse(None) is None
+
+
+def test_can_take_list():
+    with pytest.raises(TypeError):
+        assert ragged.unique_inverse([1, 2, 4, 3, 4, 5, 6, 20]) is None
+
+
+def test_can_take_simple_array():
+    arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
+    expected_unique_values = ragged.array([1, 2, 3, 4])
+    expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    unique_values, inverse_indices = ragged.unique_inverse(arr)
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices)
+
+
+def test_can_take_normal_array():
+    arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]])
+    expected_unique_values = ragged.array([1, 2, 3, 4])
+    expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    unique_values, inverse_indices = ragged.unique_inverse(arr)
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices)
+
+
+def test_can_take_scalar():
+    arr = ragged.array([5])
+    expected_unique_values = ragged.array([5])
+    expected_unique_indices = ragged.array([0])
+    unique_values, unique_indices = ragged.unique_inverse(arr)
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
+
+
+# unique_all tests
+def test_can_all_none():
+    with pytest.raises(TypeError):
+        assert ragged.unique_all(None) is None
+
+
+def test_can_all_list():
+    with pytest.raises(TypeError):
+        assert ragged.unique_all([1, 2, 4, 3, 4, 5, 6, 20]) is None
+
+
+def test_can_all_simple_array():
+    arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
+    expected_unique_values = ragged.array([1, 2, 3, 4])
+    expected_unique_indices = ragged.array([0, 1, 3, 6])
+    expected_unique_inverse = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    expected_unique_counts = ragged.array([1, 2, 3, 4])
+    unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(
+        arr
+    )
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
+    assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
+    assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)
+
+
+def test_can_all_normal_array():
+    arr = ragged.array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]])
+    expected_unique_values = ragged.array([2, 3, 4, 5])
+    expected_unique_indices = ragged.array([0, 3, 6, 5])
+    expected_unique_inverse = ragged.array([0, 0, 0, 1, 1, 3, 2, 2, 2, 2])
+    expected_unique_counts = ragged.array([3, 2, 4, 1])
+    unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(
+        arr
+    )
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
+    assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
+    assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)
+
+
+def test_can_all_scalar():
+    arr = ragged.array([5])
+    expected_unique_values = ragged.array([5])
+    expected_unique_indices = ragged.array([0])
+    expected_unique_inverse = ragged.array([0])
+    expected_unique_counts = ragged.array([1])
+    unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(
+        arr
+    )
+    assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
+    assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
+    assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
+    assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)