Skip to content

Commit

Permalink
Review PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
camposandro committed Jun 18, 2024
1 parent d0792af commit b262db2
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 17 deletions.
4 changes: 3 additions & 1 deletion src/lsdb/catalog/dataset/healpix_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ def _perform_search(
"""Performs a search on the catalog from a list of pixels to search in
Args:
metadata (hc.catalog.Catalog | hc.catalog.MarginCatalog): The metadata of the hipscat catalog.
metadata (hc.catalog.Catalog | hc.catalog.MarginCatalog): The metadata of
the hipscat catalog after the coarse filtering is applied. The partitions
it contains are only those that overlap with the spatial region.
search (AbstractSearch): Instance of AbstractSearch.
Returns:
Expand Down
16 changes: 10 additions & 6 deletions src/lsdb/loaders/hipscat/abstract_catalog_loader.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
from __future__ import annotations

from abc import abstractmethod
from typing import Generic, List, Tuple, Type
from typing import Generic, List, Tuple, Type, TypeVar

import dask.dataframe as dd
import hipscat as hc
import numpy as np
import pandas as pd
from hipscat.catalog.healpix_dataset.healpix_dataset import HealpixDataset as HCHealpixDataset
from hipscat.io.file_io import file_io
from hipscat.pixel_math import HealpixPixel
from hipscat.pixel_math.healpix_pixel_function import get_pixel_argsort

from lsdb.catalog.catalog import DaskDFPixelMap
from lsdb.catalog.dataset.dataset import Dataset
from lsdb.dask.divisions import get_pixels_divisions
from lsdb.loaders.hipscat.hipscat_loading_config import HipscatLoadingConfig
from lsdb.types import CatalogTypeVar, HCCatalogTypeVar

CatalogTypeVar = TypeVar("CatalogTypeVar", bound=Dataset)
HCCatalogTypeVar = TypeVar("HCCatalogTypeVar", bound=HCHealpixDataset)


class AbstractCatalogLoader(Generic[CatalogTypeVar]):
Expand Down Expand Up @@ -46,7 +50,7 @@ def _load_hipscat_catalog(self, catalog_type: Type[HCCatalogTypeVar]) -> HCCatal
"""Load `hipscat` library catalog object with catalog metadata and partition data"""
return catalog_type.read_from_hipscat(self.path, storage_options=self.storage_options)

def _load_dask_df_and_map(self, catalog: HCCatalogTypeVar) -> Tuple[dd.core.DataFrame, DaskDFPixelMap]:
def _load_dask_df_and_map(self, catalog: HCHealpixDataset) -> Tuple[dd.core.DataFrame, DaskDFPixelMap]:
"""Load Dask DF from parquet files and make dict of HEALPix pixel to partition index"""
pixels = catalog.get_healpix_pixels()
ordered_pixels = np.array(pixels)[get_pixel_argsort(pixels)]
Expand All @@ -57,15 +61,15 @@ def _load_dask_df_and_map(self, catalog: HCCatalogTypeVar) -> Tuple[dd.core.Data
return ddf, pixel_to_index_map

def _get_paths_from_pixels(
self, catalog: HCCatalogTypeVar, ordered_pixels: List[HealpixPixel]
self, catalog: HCHealpixDataset, ordered_pixels: List[HealpixPixel]
) -> List[hc.io.FilePointer]:
paths = hc.io.paths.pixel_catalog_files(
catalog.catalog_base_dir, ordered_pixels, self.storage_options
)
return paths

def _load_df_from_paths(
self, catalog: HCCatalogTypeVar, paths: List[hc.io.FilePointer], divisions: Tuple[int, ...] | None
self, catalog: HCHealpixDataset, paths: List[hc.io.FilePointer], divisions: Tuple[int, ...] | None
) -> dd.core.DataFrame:
dask_meta_schema = self._load_metadata_schema(catalog)
if self.config.columns:
Expand All @@ -85,7 +89,7 @@ def _load_df_from_paths(
)
return dd.io.from_pandas(dask_meta_schema, npartitions=1)

def _load_metadata_schema(self, catalog: HCCatalogTypeVar) -> pd.DataFrame:
def _load_metadata_schema(self, catalog: HCHealpixDataset) -> pd.DataFrame:
metadata_pointer = hc.io.paths.get_common_metadata_pointer(catalog.catalog_base_dir)
metadata = file_io.read_parquet_metadata(metadata_pointer, storage_options=self.storage_options)
return (
Expand Down
12 changes: 2 additions & 10 deletions src/lsdb/types.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
from __future__ import annotations
from typing import Dict, List, Tuple

from typing import Dict, List, Tuple, TypeVar

from hipscat.catalog.healpix_dataset.healpix_dataset import HealpixDataset as HCHealpixDataset
from hipscat.pixel_math import HealpixPixel
from typing_extensions import TypeAlias

from lsdb.catalog.dataset.dataset import Dataset

# Compute pixel map returns a tuple. The first element is
# the number of data points within the HEALPix pixel, the
# second element is the list of pixels it contains.
HealpixInfo: TypeAlias = Tuple[int, List[int]]
DaskDFPixelMap = Dict[HealpixPixel, int]

# Generic lsdb and hipscat catalog types
CatalogTypeVar = TypeVar("CatalogTypeVar", bound=Dataset)
HCCatalogTypeVar = TypeVar("HCCatalogTypeVar", bound=HCHealpixDataset)
DaskDFPixelMap = Dict[HealpixPixel, int]

0 comments on commit b262db2

Please sign in to comment.