Skip to content

Commit 1bbbbdf

Browse files
authored
Avoid expanding paths when source and destination are lists (#1349)
1 parent 212c26f commit 1bbbbdf

File tree

6 files changed

+324
-163
lines changed

6 files changed

+324
-163
lines changed

fsspec/asyn.py

Lines changed: 101 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -344,31 +344,42 @@ async def _copy(
344344
elif on_error is None:
345345
on_error = "raise"
346346

347-
source_is_str = isinstance(path1, str)
348-
paths = await self._expand_path(path1, maxdepth=maxdepth, recursive=recursive)
349-
if source_is_str and (not recursive or maxdepth is not None):
350-
# Non-recursive glob does not copy directories
351-
paths = [p for p in paths if not (trailing_sep(p) or await self._isdir(p))]
352-
if not paths:
353-
return
347+
if isinstance(path1, list) and isinstance(path2, list):
348+
# No need to expand paths when both source and destination
349+
# are provided as lists
350+
paths1 = path1
351+
paths2 = path2
352+
else:
353+
source_is_str = isinstance(path1, str)
354+
paths1 = await self._expand_path(
355+
path1, maxdepth=maxdepth, recursive=recursive
356+
)
357+
if source_is_str and (not recursive or maxdepth is not None):
358+
# Non-recursive glob does not copy directories
359+
paths1 = [
360+
p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
361+
]
362+
if not paths1:
363+
return
364+
365+
source_is_file = len(paths1) == 1
366+
dest_is_dir = isinstance(path2, str) and (
367+
trailing_sep(path2) or await self._isdir(path2)
368+
)
354369

355-
source_is_file = len(paths) == 1
356-
dest_is_dir = isinstance(path2, str) and (
357-
trailing_sep(path2) or await self._isdir(path2)
358-
)
370+
exists = source_is_str and (
371+
(has_magic(path1) and source_is_file)
372+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
373+
)
374+
paths2 = other_paths(
375+
paths1,
376+
path2,
377+
exists=exists,
378+
flatten=not source_is_str,
379+
)
359380

360-
exists = source_is_str and (
361-
(has_magic(path1) and source_is_file)
362-
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
363-
)
364-
path2 = other_paths(
365-
paths,
366-
path2,
367-
exists=exists,
368-
flatten=not source_is_str,
369-
)
370381
batch_size = batch_size or self.batch_size
371-
coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths, path2)]
382+
coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
372383
result = await _run_coros_in_chunks(
373384
coros, batch_size=batch_size, return_exceptions=True, nofiles=True
374385
)
@@ -503,33 +514,39 @@ async def _put(
503514
constructor, or for all instances by setting the "gather_batch_size" key
504515
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
505516
"""
506-
source_is_str = isinstance(lpath, str)
507-
if source_is_str:
508-
lpath = make_path_posix(lpath)
509-
fs = LocalFileSystem()
510-
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
511-
if source_is_str and (not recursive or maxdepth is not None):
512-
# Non-recursive glob does not copy directories
513-
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
514-
if not lpaths:
515-
return
516-
517-
source_is_file = len(lpaths) == 1
518-
dest_is_dir = isinstance(rpath, str) and (
519-
trailing_sep(rpath) or await self._isdir(rpath)
520-
)
517+
if isinstance(lpath, list) and isinstance(rpath, list):
518+
# No need to expand paths when both source and destination
519+
# are provided as lists
520+
rpaths = rpath
521+
lpaths = lpath
522+
else:
523+
source_is_str = isinstance(lpath, str)
524+
if source_is_str:
525+
lpath = make_path_posix(lpath)
526+
fs = LocalFileSystem()
527+
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
528+
if source_is_str and (not recursive or maxdepth is not None):
529+
# Non-recursive glob does not copy directories
530+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
531+
if not lpaths:
532+
return
533+
534+
source_is_file = len(lpaths) == 1
535+
dest_is_dir = isinstance(rpath, str) and (
536+
trailing_sep(rpath) or await self._isdir(rpath)
537+
)
521538

522-
rpath = self._strip_protocol(rpath)
523-
exists = source_is_str and (
524-
(has_magic(lpath) and source_is_file)
525-
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
526-
)
527-
rpaths = other_paths(
528-
lpaths,
529-
rpath,
530-
exists=exists,
531-
flatten=not source_is_str,
532-
)
539+
rpath = self._strip_protocol(rpath)
540+
exists = source_is_str and (
541+
(has_magic(lpath) and source_is_file)
542+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
543+
)
544+
rpaths = other_paths(
545+
lpaths,
546+
rpath,
547+
exists=exists,
548+
flatten=not source_is_str,
549+
)
533550

534551
is_dir = {l: os.path.isdir(l) for l in lpaths}
535552
rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
@@ -574,35 +591,44 @@ async def _get(
574591
constructor, or for all instances by setting the "gather_batch_size" key
575592
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
576593
"""
577-
source_is_str = isinstance(rpath, str)
578-
# First check for rpath trailing slash as _strip_protocol removes it.
579-
source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
580-
rpath = self._strip_protocol(rpath)
581-
rpaths = await self._expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
582-
if source_is_str and (not recursive or maxdepth is not None):
583-
# Non-recursive glob does not copy directories
584-
rpaths = [
585-
p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
586-
]
587-
if not rpaths:
588-
return
594+
if isinstance(lpath, list) and isinstance(rpath, list):
595+
# No need to expand paths when both source and destination
596+
# are provided as lists
597+
rpaths = rpath
598+
lpaths = lpath
599+
else:
600+
source_is_str = isinstance(rpath, str)
601+
# First check for rpath trailing slash as _strip_protocol removes it.
602+
source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
603+
rpath = self._strip_protocol(rpath)
604+
rpaths = await self._expand_path(
605+
rpath, recursive=recursive, maxdepth=maxdepth
606+
)
607+
if source_is_str and (not recursive or maxdepth is not None):
608+
# Non-recursive glob does not copy directories
609+
rpaths = [
610+
p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
611+
]
612+
if not rpaths:
613+
return
589614

590-
lpath = make_path_posix(lpath)
591-
source_is_file = len(rpaths) == 1
592-
dest_is_dir = isinstance(lpath, str) and (
593-
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
594-
)
615+
lpath = make_path_posix(lpath)
616+
source_is_file = len(rpaths) == 1
617+
dest_is_dir = isinstance(lpath, str) and (
618+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
619+
)
620+
621+
exists = source_is_str and (
622+
(has_magic(rpath) and source_is_file)
623+
or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
624+
)
625+
lpaths = other_paths(
626+
rpaths,
627+
lpath,
628+
exists=exists,
629+
flatten=not source_is_str,
630+
)
595631

596-
exists = source_is_str and (
597-
(has_magic(rpath) and source_is_file)
598-
or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
599-
)
600-
lpaths = other_paths(
601-
rpaths,
602-
lpath,
603-
exists=exists,
604-
flatten=not source_is_str,
605-
)
606632
[os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
607633
batch_size = kwargs.pop("batch_size", self.batch_size)
608634

0 commit comments

Comments
 (0)