Skip to content

Commit

Permalink
fix case where path ends in slash
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Aug 29, 2024
1 parent 31f5a8a commit a15aa35
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions dask_cuda/benchmarks/local_read_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,12 @@ def get_fs_paths_kwargs(args):
s3_args[_mapping[k]] = v

fs = pa_fs.FileSystem.from_uri(args.path)[0]
kwargs["filesystem"] = type(fs)(**s3_args)
try:
region = {"region": fs.region}
except AttributeError:
region = {}
kwargs["filesystem"] = type(fs)(**region, **s3_args)
fsspec_fs = ArrowFSWrapper(kwargs["filesystem"])
paths = fsspec_fs.glob(f"{args.path}/*.parquet")

if args.type == "gpu":
kwargs["blocksize"] = args.blocksize
Expand All @@ -74,11 +77,14 @@ def get_fs_paths_kwargs(args):
args.path, mode="rb", storage_options=storage_options
)[0]
kwargs["filesystem"] = fsspec_fs
paths = fsspec_fs.glob(f"{args.path}/*.parquet")

kwargs["blocksize"] = args.blocksize
kwargs["aggregate_files"] = args.aggregate_files

# Collect list of paths
stripped_url_path = fsspec_fs._strip_protocol(args.path)
if stripped_url_path.endswith("/"):
stripped_url_path = stripped_url_path[:-1]
paths = fsspec_fs.glob(f"{stripped_url_path}/*.parquet")
if args.file_count:
paths = paths[: args.file_count]

Expand Down

0 comments on commit a15aa35

Please sign in to comment.