Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: set functions and tests #57

Merged
merged 21 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
9225efb
adding set functions and tests
ohrechykha Jul 30, 2024
14c58ac
pushing pre-commit changes
ohrechykha Jul 31, 2024
24ddbfd
ruff fixes for test_spec_set_functions.py
ohrechykha Aug 5, 2024
22fe8b9
further fixes in test_spec_set_functions.py
ohrechykha Aug 5, 2024
9153357
fixing mypy unreachable errors in _spec_set_functions.py
ohrechykha Aug 5, 2024
6c814a7
marking tests with None and empty arrays as comments
ohrechykha Aug 5, 2024
83dabd9
adding namedtuple & corresponding test fixes
ohrechykha Aug 19, 2024
2e92e46
Merge remote-tracking branch 'origin/main' into oleksii-unique
ohrechykha Aug 22, 2024
1ca9106
function if changes + test standartization
ohrechykha Aug 26, 2024
f04ccbf
correcting function ifs + test standartization
ohrechykha Aug 27, 2024
7b30c58
further test standartisation
ohrechykha Aug 28, 2024
7fb48f1
scalar handling and testing
ohrechykha Aug 29, 2024
6e8b6ee
_array_object changes, empty array handling + tests
ohrechykha Sep 10, 2024
36dfb42
implementing Jim's suggestion, disabling CI errors
ohrechykha Sep 10, 2024
e5852b3
disabling errors and warnings
ohrechykha Sep 10, 2024
1ff29b9
further ignores + adding equal_nan in np.unique instances
ohrechykha Sep 10, 2024
48893b8
better ignores
ohrechykha Sep 10, 2024
943d717
improving ignores
ohrechykha Sep 10, 2024
f719e44
Merge branch 'main' into oleksii-unique
jpivarski Sep 11, 2024
8c0867a
avoiding code duplication in _spec_array_object
ohrechykha Sep 12, 2024
67b5807
returning np.empty and input dtype in all functions
ohrechykha Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 45 additions & 9 deletions src/ragged/_spec_set_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@

from collections import namedtuple

import awkward as ak
import numpy as np

import ragged

from ._spec_array_object import array

unique_all_result = namedtuple( # pylint: disable=C0103
Expand Down Expand Up @@ -47,8 +52,16 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]:
https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_all.html
"""

x # noqa: B018, pylint: disable=W0104
raise NotImplementedError("TODO 128") # noqa: EM101
if not isinstance(x, ragged.array):
err = f"Expected ragged type but got {type(x)}"
raise TypeError(err)

if len(x)==1:
return ragged.array(x), ragged.array([0]), ragged.array([0]), ragged.array([1])

x_flat=ak.ravel(x._impl)
values, indices, inverse_indices, counts = np.unique(x_flat.layout.data, return_index=True, return_inverse=True, return_counts=True)
return ragged.array(values),ragged.array(indices), ragged.array(inverse_indices), ragged.array(counts)


unique_counts_result = namedtuple( # pylint: disable=C0103
Expand Down Expand Up @@ -77,9 +90,16 @@ def unique_counts(x: array, /) -> tuple[array, array]:

https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_counts.html
"""
if not isinstance(x, ragged.array):
err = f"Expected ragged type but got {type(x)}"
raise TypeError(err)

x # noqa: B018, pylint: disable=W0104
raise NotImplementedError("TODO 129") # noqa: EM101
if len(x)==1:
return ragged.array(x), ragged.array([1])

x_flat = ak.ravel(x._impl)
values, counts = np.unique(x_flat.layout.data, return_counts=True)
return ragged.array(values), ragged.array(counts)


unique_inverse_result = namedtuple( # pylint: disable=C0103
Expand Down Expand Up @@ -108,9 +128,17 @@ def unique_inverse(x: array, /) -> tuple[array, array]:

https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html
"""
if not isinstance(x, ragged.array):
err = f"Expected ragged type but got {type(x)}"
raise TypeError(err)

if len(x) == 1:
return ragged.array(x), ragged.array([0])

x # noqa: B018, pylint: disable=W0104
raise NotImplementedError("TODO 130") # noqa: EM101
x_flat=ak.ravel(x._impl)
values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True)

return ragged.array(values), ragged.array(inverse_indices)


def unique_values(x: array, /) -> array:
Expand All @@ -128,6 +156,14 @@ def unique_values(x: array, /) -> array:

https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_values.html
"""

x # noqa: B018, pylint: disable=W0104
raise NotImplementedError("TODO 131") # noqa: EM101
if not isinstance(x, ragged.array):
err = f"Expected ragged type but got {type(x)}"
raise TypeError(err)

if len(x)==1:
return ragged.array(x)

x_flat = ak.ravel(x._impl)
values = np.unique(x_flat.layout.data)

return ragged.array(values)
145 changes: 145 additions & 0 deletions tests/test_spec_set_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,155 @@
from __future__ import annotations

import ragged
import awkward as ak
import pytest

# Specific algorithm for unique_values:
# 1 take an input array
# 2 flatten input_array unless its 1d
# 3 {remember the first element, loop through the rest of the list to see if there are copies
# if yes then discard it and repeat the step
# if not then add it to the output and repeat the step}
# 4 once the cycle is over return an array of unique elements in the input array (the output must be of the same type as input array)


def test_existence():
assert ragged.unique_all is not None
assert ragged.unique_counts is not None
assert ragged.unique_inverse is not None
assert ragged.unique_values is not None

#unique_values tests
def test_can_take_none():
assert ragged.unique_values(None)==None

def test_can_take_list():
with pytest.raises(TypeError):
assert ragged.unique_values([1,2,4,3,4,5,6,20])

def test_can_take_empty_arr():
with pytest.raises(TypeError):
assert ragged.unique_values(ragged.array([]))

def test_can_take_moredimensions():
with pytest.raises(ValueError):
assert ragged.unique_values(ragged.array([[1,2,3,4],[5,6]]))

def test_can_take_1d_array():
arr=ragged.array([5,6,7,8,8,9,1,2,3,4,10,0,15,2])
expected_unique_values = ragged.array([0,1,2,3,4,5,6,7,8,9,10,15])
assert ak.to_list(ragged.unique_values(arr))==ak.to_list(expected_unique_values)


#unique_counts tests
def test_can_count_none():
with pytest.raises(TypeError):
assert ragged.unique_counts(None) is None

def test_can_count_list():
with pytest.raises(TypeError):
assert ragged.unique_counts([1,2,4,3,4,5,6,20]) is None

def test_can_count_simple_array():
arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_counts = ragged.array([1, 2, 3, 4])
unique_values, unique_counts = ragged.unique_counts(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_counts) == ak.to_list(expected_counts)

def test_can_count_normal_array():
arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_counts = ragged.array([1, 2, 3, 4])
unique_values, unique_counts = ragged.unique_counts(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_counts) == ak.to_list(expected_counts)


def test_can_count_scalar():
arr = ragged.array([5])
expected_unique_values = ragged.array([5])
expected_counts = ragged.array([1])
unique_values, unique_counts = ragged.unique_counts(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_counts) == ak.to_list(expected_counts)

#unique_inverse tests
def test_can_take_none():
with pytest.raises(TypeError):
assert ragged.unique_inverse(None) is None

def test_can_take_list():
with pytest.raises(TypeError):
assert ragged.unique_inverse([1,2,4,3,4,5,6,20]) is None

def test_can_take_simple_array():
arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3])
unique_values, inverse_indices = ragged.unique_inverse(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices)

def test_can_take_normal_array():
arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3])
unique_values, inverse_indices = ragged.unique_inverse(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices)


def test_can_take_scalar():
arr = ragged.array([5])
expected_unique_values = ragged.array([5])
expected_unique_indices = ragged.array([0])
unique_values, unique_indices = ragged.unique_inverse(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)

#unique_all tests
def test_can_all_none():
with pytest.raises(TypeError):
assert ragged.unique_all(None) is None

def test_can_all_list():
with pytest.raises(TypeError):
assert ragged.unique_all([1,2,4,3,4,5,6,20]) is None

def test_can_all_simple_array():
arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_unique_indices = ragged.array([0, 1, 3, 6])
expected_unique_inverse = ragged.array([0, 1,1,2,2,2,3,3,3,3])
expected_unique_counts = ragged.array([1, 2, 3, 4])
unique_values, unique_indices, unique_inverse,unique_counts = ragged.unique_all(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)

def test_can_all_normal_array():
arr = ragged.array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]])
expected_unique_values = ragged.array([2, 3, 4, 5])
expected_unique_indices = ragged.array([0, 3, 6, 5])
expected_unique_inverse = ragged.array([0,0,0,1,1,3,2,2,2,2])
expected_unique_counts = ragged.array([3, 2, 4, 1])
unique_values, unique_indices, unique_inverse,unique_counts = ragged.unique_all(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)

def test_can_all_scalar():
arr = ragged.array([5])
expected_unique_values = ragged.array([5])
expected_unique_indices = ragged.array([0])
expected_unique_inverse = ragged.array([0])
expected_unique_counts = ragged.array([1])
unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)
Loading