Merge pull request #382 from nasa-fornax/panstarrs_hats

converting panstarrs read_hipscat to read_hats
nasa-fornax · Mar 5, 2025 · 0b771f3 · 0b771f3
2 parents f9642da + 776f635
commit 0b771f3
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 26 deletions.
diff --git a/light_curves/code_src/panstarrs_functions.py b/light_curves/code_src/panstarrs_functions.py
@@ -4,7 +4,7 @@
 import lsdb
 from dask.distributed import Client
 from data_structures import MultiIndexDFObject
-
+from upath import UPath
 
 # panstarrs light curves from hipscat catalog in S3 using lsdb
 def panstarrs_get_lightcurves(sample_table, *, radius=1):
@@ -26,28 +26,26 @@ def panstarrs_get_lightcurves(sample_table, *, radius=1):
     #read in the panstarrs object table to lsdb
     #this table will be used for cross matching with our sample's ra and decs
     #but does not have light curve information
-    panstarrs_object = lsdb.read_hipscat(
-        's3://stpubdata/panstarrs/ps1/public/hipscat/otmo', 
-        storage_options={'anon': True},
-        columns=[
-            "objID",  # PS1 ID
-            "raMean", "decMean",  # coordinates to use for cross-matching
-            "nStackDetections",  # some other data to use
-        ]
-    )
+    panstarrs_object = lsdb.read_hats(
+        UPath('s3://stpubdata/panstarrs/ps1/public/hats/otmo', anon=True), 
+        margin_cache=UPath('s3://stpubdata/panstarrs/ps1/public/hats/otmo_10arcs', anon=True),
+        columns=[ "objID",  # PS1 ID
+                "raMean", "decMean",  # coordinates to use for cross-matching
+                "nStackDetections",  # some other data to use
+                ]
+        )
     #read in the panstarrs light curves to lsdb
     #panstarrs recommendation is not to index into this table with ra and dec
     #but to use object ids from the above object table
-    panstarrs_detect = lsdb.read_hipscat(
-        's3://stpubdata/panstarrs/ps1/public/hipscat/detection', 
-        storage_options={'anon': True},
-        columns=[
-            "objID",  # PS1 object ID
-            "detectID",  # PS1 detection ID
-            # light-curve stuff
-            "obsTime", "filterID", "psfFlux", "psfFluxErr",
-        ],
-    )
+    panstarrs_detect = lsdb.read_hats(
+        UPath('s3://stpubdata/panstarrs/ps1/public/hats/detection', anon=True), 
+        margin_cache=UPath('s3://stpubdata/panstarrs/ps1/public/hats/detection_10arcs', anon=True),
+        columns=[ "objID",  # PS1 object ID
+                "detectID",  # PS1 detection ID
+                # light-curve stuff
+                "obsTime", "filterID", "psfFlux", "psfFluxErr",
+                ]
+        )
     #convert astropy table to pandas dataframe
     #special care for the SkyCoords in the table
     sample_df = pd.DataFrame({'objectid': sample_table['objectid'],
@@ -119,4 +117,4 @@ def panstarrs_get_lightcurves(sample_table, *, radius=1):
     }).set_index(["objectid", "label", "band", "time"])
 
     return MultiIndexDFObject(data=df_lc)
-    
+
diff --git a/light_curves/light_curve_generator.md b/light_curves/light_curve_generator.md
@@ -6,9 +6,9 @@ jupytext:
     format_version: 0.13
     jupytext_version: 1.16.4
 kernelspec:
-  display_name: science_demo
+  display_name: notebook
   language: python
-  name: conda-env-science_demo-py
+  name: python3
 ---
 
 # Make Multi-Wavelength Light Curves Using Archival Data
@@ -74,7 +74,7 @@ This cell will install them if needed:
 
 ```{code-cell} ipython3
 # Uncomment the next line to install dependencies if needed.
-# !pip install -r requirements_light_curve_generator.txt
+#!pip install -r requirements_light_curve_generator.txt
 ```
 
 ```{code-cell} ipython3
@@ -101,7 +101,7 @@ from tess_kepler_functions import tess_kepler_get_lightcurves
 from wise_functions import wise_get_lightcurves
 # Note: ZTF data is temporarily located in a non-public AWS S3 bucket. It is automatically available
 # from the Fornax Science Console, but otherwise will require explicit user credentials.
-from ztf_functions import ztf_get_lightcurves
+# from ztf_functions import ztf_get_lightcurves
 ```
 
 ## 1. Define the sample
@@ -258,6 +258,8 @@ print('WISE search took:', time.time() - WISEstarttime, 's')
 ### 2.4 MAST: Pan-STARRS
 The function to retrieve lightcurves from Pan-STARRS uses a version of both the object and light curve catalogs that are stored in the cloud and accessed using [lsdb](https://docs.lsdb.io/en/stable/).  This function is efficient at large scale (sample sizes > ~1000).
 
+Some warnings are expected.
+
 ```{code-cell} ipython3
 panstarrsstarttime = time.time()
 

diff --git a/light_curves/requirements_light_curve_generator.txt b/light_curves/requirements_light_curve_generator.txt
@@ -14,7 +14,8 @@ astroquery>=0.4.8.dev0
 acstools
 lightkurve
 alerce
-lsdb
+lsdb>=0.4.5
+universal_pathlib
 # We use distributed in this notebook, but installing any dask would make the [dataframe] extras required by dependencies for other notebooks. 
 # It feels to be the cleanest solution to add the dependency here as we don't directly use it elsewhere. 
 dask[distributed,dataframe]