Skip to content

Commit

Permalink
Merge development branch (#389)
Browse files Browse the repository at this point in the history
* Bump pre-commit-ci/lite-action from 1.0.2 to 1.0.3

Bumps [pre-commit-ci/lite-action](https://github.com/pre-commit-ci/lite-action) from 1.0.2 to 1.0.3.
- [Release notes](https://github.com/pre-commit-ci/lite-action/releases)
- [Commits](pre-commit-ci/lite-action@v1.0.2...v1.0.3)

---
updated-dependencies:
- dependency-name: pre-commit-ci/lite-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <[email protected]>

* Update number of positional args

* update healpixdataset catalog info type

* run ci

* Override catalog's `__len__` method (#371)

* Wrap catalog's length method

* Clarify error message

* add alignment parameter to moc

* Add sparse histogram implementation (#376)

* Add sparse histogram implementation

* Add scipy as a dependency

* Improve test coverage

* Fix read/write of dense histogram (#380)

* Update missing hipscat references

---------

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Sean McGuire <[email protected]>
Co-authored-by: Sean McGuire <[email protected]>
  • Loading branch information
4 people authored Oct 22, 2024
1 parent 94ef033 commit cfffa75
Show file tree
Hide file tree
Showing 13 changed files with 245 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
push:
branches: [ main ]
pull_request:
branches: [ main, hats ]
branches: [ main ]

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pre-commit-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
push:
branches: [ main ]
pull_request:
branches: [ main, hats ]
branches: [ main ]

jobs:
pre-commit-ci:
Expand All @@ -31,5 +31,5 @@ jobs:
extra_args: --all-files --verbose
env:
SKIP: "check-lincc-frameworks-template-version,no-commit-to-branch,check-added-large-files,validate-pyproject,sphinx-build,pytest-check"
- uses: pre-commit-ci/[email protected].2
- uses: pre-commit-ci/[email protected].3
if: failure() && github.event_name == 'pull_request' && github.event.pull_request.draft == false
2 changes: 1 addition & 1 deletion .github/workflows/testing-and-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
push:
branches: [ main ]
pull_request:
branches: [ main, hats ]
branches: [ main ]

jobs:
build:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies = [
"pandas",
"pyarrow>=14.0.1",
"pydantic",
"scipy",
"typing-extensions>=4.3.0",
"universal-pathlib",
]
Expand Down
1 change: 1 addition & 0 deletions src/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ ignored-parents=
# Maximum number of arguments for function / method.
max-args=15

# Maximum number of positional arguments.
max-positional-arguments=8

# Maximum number of attributes for a class (see R0902).
Expand Down
12 changes: 12 additions & 0 deletions src/hats/catalog/healpix_dataset/healpix_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,18 @@ def _check_files_exist(cls, catalog_base_dir: str | Path | UPath):
f"_metadata or partition info file is required in catalog directory {catalog_base_dir}"
)

def __len__(self):
"""The number of rows in the catalog.
Returns:
The number of rows in the catalog, as specified in its metadata.
This value is undetermined when the catalog is modified, and
therefore an error is raised.
"""
if self.catalog_info.total_rows == 0:
raise ValueError("The number of rows is undetermined because the catalog was modified.")
return self.catalog_info.total_rows

def get_max_coverage_order(self) -> int:
"""Gets the maximum HEALPix order for which the coverage of the catalog is known from the pixel
tree and moc if it exists"""
Expand Down
101 changes: 101 additions & 0 deletions src/hats/pixel_math/sparse_histogram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""Sparse 1-D histogram of healpix pixel counts."""

import numpy as np
from scipy.sparse import csc_array, load_npz, save_npz, sparray

import hats.pixel_math.healpix_shim as hp


class SparseHistogram:
"""Wrapper around scipy's sparse array."""

def __init__(self, sparse_array):
if not isinstance(sparse_array, sparray):
raise ValueError("The sparse array must be a scipy sparse array.")
if sparse_array.format != "csc":
raise ValueError("The sparse array must be a Compressed Sparse Column array.")
self.sparse_array = sparse_array

def add(self, other):
"""Add in another sparse histogram, updating this wrapper's array.
Args:
other (SparseHistogram): the wrapper containing the addend
"""
if not isinstance(other, SparseHistogram):
raise ValueError("Both addends should be SparseHistogram.")
if self.sparse_array.shape != other.sparse_array.shape:
raise ValueError(
"The histogram partials have incompatible sizes due to different healpix orders."
)
self.sparse_array += other.sparse_array

def to_array(self):
"""Convert the sparse array to a dense numpy array.
Returns:
dense 1-d numpy array.
"""
return self.sparse_array.toarray()[0]

def to_file(self, file_name):
"""Persist the sparse array to disk.
NB: this saves as a sparse array, and so will likely have lower space requirements
than saving the corresponding dense 1-d numpy array.
"""
save_npz(file_name, self.sparse_array)

def to_dense_file(self, file_name):
"""Persist the DENSE array to disk as a numpy array."""
with open(file_name, "wb+") as file_handle:
file_handle.write(self.to_array().data)

@classmethod
def make_empty(cls, healpix_order=10):
"""Create an empty sparse array for a given healpix order.
Args:
healpix_order (int): healpix order
Returns:
new sparse histogram
"""
histo = csc_array((1, hp.order2npix(healpix_order)), dtype=np.int64)
return cls(histo)

@classmethod
def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10):
"""Create an sparse array for a given healpix order, prefilled with counts at
the provided indexes.
e.g. for a dense 1-d numpy histogram of order 0, you might see::
[0, 4, 0, 0, 0, 0, 0, 0, 9, 0, 0]
There are only elements at [1, 8], and they have respective values [4, 9]. You
would create the sparse histogram like::
make_from_counts([1, 8], [4, 9], 0)
Args:
indexes (int[]): index locations of non-zero values
counts_at_indexes (int[]): values at the ``indexes``
healpix_order (int): healpix order
Returns:
new sparse histogram
"""
row = np.array(np.zeros(len(indexes), dtype=np.int64))
histo = csc_array((counts_at_indexes, (row, indexes)), shape=(1, hp.order2npix(healpix_order)))
return cls(histo)

@classmethod
def from_file(cls, file_name):
"""Read sparse histogram from a file.
Returns:
new sparse histogram
"""
histo = load_npz(file_name)
return cls(histo)
6 changes: 5 additions & 1 deletion src/hats/pixel_tree/pixel_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,12 @@ def __init__(
aligned_tree: PixelTree,
pixel_mapping: pd.DataFrame,
alignment_type: PixelAlignmentType,
moc: MOC = None,
) -> None:
self.pixel_tree = aligned_tree
self.pixel_mapping = pixel_mapping
self.alignment_type = alignment_type
self.moc = moc


def align_trees(
Expand Down Expand Up @@ -410,7 +412,9 @@ def filter_alignment_by_moc(alignment: PixelAlignment, moc: MOC) -> PixelAlignme
tree_29_ranges = alignment.pixel_tree.tree << (2 * (29 - alignment.pixel_tree.tree_order))
tree_mask = perform_filter_by_moc(tree_29_ranges, moc_ranges)
new_tree = PixelTree(alignment.pixel_tree.tree[tree_mask], alignment.pixel_tree.tree_order)
return PixelAlignment(new_tree, alignment.pixel_mapping.iloc[tree_mask], alignment.alignment_type)
return PixelAlignment(
new_tree, alignment.pixel_mapping.iloc[tree_mask], alignment.alignment_type, moc=moc
)


def align_with_mocs(
Expand Down
Empty file.
1 change: 1 addition & 0 deletions tests/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ ignored-parents=
# Maximum number of arguments for function / method.
max-args=10

# Maximum number of positional arguments.
max-positional-arguments=8

# Maximum number of attributes for a class (see R0902).
Expand Down
15 changes: 15 additions & 0 deletions tests/hats/catalog/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def test_catalog_load(catalog_info, catalog_pixels):
catalog = Catalog(catalog_info, catalog_pixels)
assert catalog.get_healpix_pixels() == catalog_pixels
assert catalog.catalog_name == catalog_info.catalog_name
assert catalog_info.total_rows == len(catalog)

for hp_pixel in catalog_pixels:
assert hp_pixel in catalog.pixel_tree
Expand Down Expand Up @@ -562,3 +563,17 @@ def test_generate_negative_tree_pixels_multi_order(small_sky_order1_catalog):
negative_tree = small_sky_order1_catalog.generate_negative_tree_pixels()

assert negative_tree == expected_pixels


def test_catalog_len_is_undetermined(small_sky_order1_catalog):
"""Tests that catalogs modified by queries and spatial filters have an undetermined
number of rows, case in which an error is thrown"""
with pytest.raises(ValueError, match="undetermined"):
len(small_sky_order1_catalog.filter_by_cone(0, -80, 1))
with pytest.raises(ValueError, match="undetermined"):
vertices = [(300, -50), (300, -55), (272, -55), (272, -50)]
len(small_sky_order1_catalog.filter_by_polygon(vertices))
with pytest.raises(ValueError, match="undetermined"):
len(small_sky_order1_catalog.filter_by_box(ra=(280, 300)))
with pytest.raises(ValueError, match="undetermined"):
len(small_sky_order1_catalog.filter_from_pixel_list([HealpixPixel(0, 11)]))
80 changes: 80 additions & 0 deletions tests/hats/pixel_math/test_sparse_histogram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""Test sparse histogram behavior."""

import numpy as np
import numpy.testing as npt
import pytest
from numpy import frombuffer
from scipy.sparse import csr_array

import hats.pixel_math.healpix_shim as hp
from hats.pixel_math.sparse_histogram import SparseHistogram


def test_make_empty():
"""Tests the initialization of an empty histogram at the specified order"""
histogram = SparseHistogram.make_empty(5)
expected_hist = np.zeros(hp.order2npix(5))
npt.assert_array_equal(expected_hist, histogram.to_array())


def test_read_write_round_trip(tmp_path):
"""Test that we can read what we write into a histogram file."""
histogram = SparseHistogram.make_from_counts([11], [131], 0)

# Write as a sparse array
file_name = tmp_path / "round_trip_sparse.npz"
histogram.to_file(file_name)
read_histogram = SparseHistogram.from_file(file_name)
npt.assert_array_equal(read_histogram.to_array(), histogram.to_array())

# Write as a dense 1-d numpy array
file_name = tmp_path / "round_trip_dense.npz"
histogram.to_dense_file(file_name)
with open(file_name, "rb") as file_handle:
read_histogram = frombuffer(file_handle.read(), dtype=np.int64)
npt.assert_array_equal(read_histogram, histogram.to_array())


def test_add_same_order():
"""Test that we can add two histograms created from the same order, and get
the expected results."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)

partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 0)

partial_histogram_left.add(partial_histogram_right)

expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 146]
npt.assert_array_equal(partial_histogram_left.to_array(), expected)


def test_add_different_order():
"""Test that we can NOT add histograms of different healpix orders."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)

partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 1)

with pytest.raises(ValueError, match="partials have incompatible sizes"):
partial_histogram_left.add(partial_histogram_right)


def test_add_different_type():
"""Test that we can NOT add histograms of different healpix orders."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)

with pytest.raises(ValueError, match="addends should be SparseHistogram"):
partial_histogram_left.add(5)

with pytest.raises(ValueError, match="addends should be SparseHistogram"):
partial_histogram_left.add([1, 2, 3, 4, 5])


def test_init_bad_inputs():
"""Test that the SparseHistogram type requires a compressed sparse column
as its sole `sparse_array` argument."""
with pytest.raises(ValueError, match="must be a scipy sparse array"):
SparseHistogram(5)

with pytest.raises(ValueError, match="must be a Compressed Sparse Column"):
row_sparse_array = csr_array((1, 12), dtype=np.int64)
SparseHistogram(row_sparse_array)
Loading

0 comments on commit cfffa75

Please sign in to comment.