From a58935fa92e7f9083a5cefa843132d1dc2d48b86 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> Date: Thu, 6 Mar 2025 15:54:08 -0500 Subject: [PATCH] Always provide partitioning=None and filesystem (#469) * Always provide partitioning=None and filesystem * Confirm with npix_as_dir changes --- src/hats/io/file_io/file_io.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/hats/io/file_io/file_io.py b/src/hats/io/file_io/file_io.py index 10a5787c..9853edd0 100644 --- a/src/hats/io/file_io/file_io.py +++ b/src/hats/io/file_io/file_io.py @@ -294,11 +294,11 @@ def read_parquet_file_to_pandas(file_pointer: str | Path | UPath, **kwargs) -> p Pandas DataFrame with the data from the parquet file(s) """ file_pointer = get_upath(file_pointer) - storage_options = unnest_headers_for_pandas(file_pointer.storage_options) # If we are trying to read a directory over http, we need to send the explicit list of files instead. # We don't want to get the list unnecessarily because it can be expensive. if isinstance(file_pointer, upath.implementations.http.HTTPPath) and len(file_pointer.suffixes) == 0: file_pointers = [f for f in file_pointer.iterdir() if f.is_file()] + storage_options = unnest_headers_for_pandas(file_pointer.storage_options) return pd.read_parquet( file_pointers, storage_options=storage_options, @@ -306,4 +306,9 @@ def read_parquet_file_to_pandas(file_pointer: str | Path | UPath, **kwargs) -> p partitioning=None, # Avoid the ArrowTypeError described in #367 **kwargs, ) - return pd.read_parquet(file_pointer, storage_options=storage_options, **kwargs) + return pd.read_parquet( + file_pointer.path, + filesystem=file_pointer.fs, + partitioning=None, # Avoid the ArrowTypeError described in #367 + **kwargs, + )