Skip to content

Commit

Permalink
Release 0.0.43 (#278)
Browse files Browse the repository at this point in the history
* Fix glob single file bug
  • Loading branch information
akharit authored Mar 2, 2019
1 parent 800a1d5 commit 096c146
Show file tree
Hide file tree
Showing 5 changed files with 16,864 additions and 8 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
Release History
===============

0.0.43 (2019-03-01)
+++++++++++++++++++
* Fix bug in downloader when glob returns a single file

0.0.42 (2019-02-26)
+++++++++++++++++++
* Update docstrings
Expand Down
2 changes: 1 addition & 1 deletion azure/datalake/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# license information.
# --------------------------------------------------------------------------

__version__ = "0.0.42"
__version__ = "0.0.43"

from .core import AzureDLFileSystem
from .multithread import ADLDownloader
Expand Down
31 changes: 24 additions & 7 deletions azure/datalake/store/multithread.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,26 +195,42 @@ def clear_saved():
def hash(self):
return self._name



def _setup(self):
""" Create set of parameters to loop over
"""
if "*" not in self.rpath:
rfiles = self.client._adlfs.walk(self.rpath, details=True, invalidate_cache=True)
else:

def is_glob_path(path):
path = AzureDLPath(path).trim()
prefix = path.globless_prefix
return not path == prefix
is_rpath_glob = is_glob_path(self.rpath)

if is_rpath_glob:
rfiles = self.client._adlfs.glob(self.rpath, details=True, invalidate_cache=True)
else:
rfiles = self.client._adlfs.walk(self.rpath, details=True, invalidate_cache=True)

if len(rfiles) == 1 and self.client._adlfs.info(self.rpath)['type'] == 'FILE':
if not rfiles:
raise ValueError('No files to download')

# If only one file is returned we are not sure whether user specified a dir or a file to download,
# since walk gives the same result for both i.e walk("DirWithsingleFile") == walk("DirWithSingleFile\SingleFile)
# If user specified a file in rpath,
# then we want to download the file into lpath directly and not create another subdir for that.
# If user specified a dir that happens to contain only one file, we want to create the dir as well under lpath.
if len(rfiles) == 1 and not is_rpath_glob and self.client._adlfs.info(self.rpath)['type'] == 'FILE':
if os.path.exists(self.lpath) and os.path.isdir(self.lpath):
file_pairs = [(os.path.join(self.lpath, os.path.basename(rfiles[0]['name'] + '.inprogress')),
rfiles[0])]
else:
file_pairs = [(self.lpath, rfiles[0])]
elif len(rfiles) >= 1:
else:
local_rel_rpath = str(AzureDLPath(self.rpath).trim().globless_prefix)
file_pairs = [(os.path.join(self.lpath, os.path.relpath(f['name'] +'.inprogress', local_rel_rpath)), f)
for f in rfiles]
else:
raise ValueError('No files to download')


# this property is used for internal validation
# and should not be referenced directly by public callers
Expand All @@ -231,6 +247,7 @@ def _setup(self):
self.client.submit(rfile['name'], lfile, rfile['length'])

return existing_files

def run(self, nthreads=None, monitor=True):
""" Populate transfer queue and execute downloads
Expand Down
Loading

0 comments on commit 096c146

Please sign in to comment.