Skip to content

Commit

Permalink
Always provide partitioning=None and filesystem (#469)
Browse files Browse the repository at this point in the history
* Always provide partitioning=None and filesystem

* Confirm with npix_as_dir changes
  • Loading branch information
delucchi-cmu authored Mar 6, 2025
1 parent f08cb51 commit a58935f
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/hats/io/file_io/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,16 +294,21 @@ def read_parquet_file_to_pandas(file_pointer: str | Path | UPath, **kwargs) -> p
Pandas DataFrame with the data from the parquet file(s)
"""
file_pointer = get_upath(file_pointer)
storage_options = unnest_headers_for_pandas(file_pointer.storage_options)
# If we are trying to read a directory over http, we need to send the explicit list of files instead.
# We don't want to get the list unnecessarily because it can be expensive.
if isinstance(file_pointer, upath.implementations.http.HTTPPath) and len(file_pointer.suffixes) == 0:
file_pointers = [f for f in file_pointer.iterdir() if f.is_file()]
storage_options = unnest_headers_for_pandas(file_pointer.storage_options)
return pd.read_parquet(
file_pointers,
storage_options=storage_options,
filesystem=file_pointer.fs,
partitioning=None, # Avoid the ArrowTypeError described in #367
**kwargs,
)
return pd.read_parquet(file_pointer, storage_options=storage_options, **kwargs)
return pd.read_parquet(
file_pointer.path,
filesystem=file_pointer.fs,
partitioning=None, # Avoid the ArrowTypeError described in #367
**kwargs,
)

0 comments on commit a58935f

Please sign in to comment.