Skip to content

Commit

Permalink
Show both files and folders recursively (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
Skaar, Bjørn-Andre committed Nov 29, 2022
1 parent b3defcc commit 01e8e16
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 23 deletions.
19 changes: 9 additions & 10 deletions dapla/backports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,20 @@

def show(gcs_path):
"""
Backported dapla function to recursively show all folders below a given GCS path
:param gcs_path: the path from which you want to list all folders
:return: a simplified list of files or folders
Backported dapla function to recursively show all files and folders below a given GCS path
:param gcs_path: the path from which you want to list all files and folders
:return: a list of files and folders
"""
fs = FileClient.get_gcs_file_system()
out = dict()
files = None
for path, dirs, files in fs.walk(gcs_path, detail=True):
out.update({_trimmed_name(info): info for name, info in dirs.items()})
# Add the base path (if it exists) to avoid an empty list when there are no subfolders
if len(out) == 0 and files is not None:
# Get the bucket name from any of the files
bucket_name = list(files.values())[0]['bucket']
trimmed_name = gcs_path.lstrip('gs://').lstrip(bucket_name).rstrip('/')
out[trimmed_name] = {}
out.update({_trimmed_name(info): info for name, info in files.items()})
if len(out) == 0 and files is None:
return []
# Handle leaf nodes
elif len(out) == 0:
out.update({_trimmed_name(info): info for name, info in files.items()})
return sorted(out)


Expand Down
32 changes: 19 additions & 13 deletions tests/test_backports.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,32 @@
import mock
from dapla.backports import show
from dapla.backports import show, details
from dapla.gcs import GCSFileSystem


@mock.patch('dapla.backports.FileClient')
def test_show_all_subfolders(file_client_mock):
def test_show_subfolders_and_files(file_client_mock):
file_client_mock.get_gcs_file_system.return_value = GCSFileSystem()
result = show('gs://anaconda-public-data/nyc-taxi/')
assert result == ['/nyc-taxi/2015.parquet',
'/nyc-taxi/csv',
'/nyc-taxi/csv/2014',
'/nyc-taxi/csv/2015',
'/nyc-taxi/csv/2016',
'/nyc-taxi/nyc.parquet',
'/nyc-taxi/taxi.parquet']
result = show('gs://anaconda-public-data/projects-data')
assert result == ['/projects-data/',
'/projects-data/tensorboard/',
'/projects-data/tensorboard/mnist_tutorial.zip']


@mock.patch('dapla.backports.FileClient')
def test_show_leaf_folder(file_client_mock):
def test_show_leaf_folder_with_file(file_client_mock):
file_client_mock.get_gcs_file_system.return_value = GCSFileSystem()
result = show('gs://anaconda-public-data/nyc-taxi/csv/2014')
assert result == ['/nyc-taxi/csv/2014']
result = show('gs://anaconda-public-data/iris')
assert result == ['/iris/iris.csv']


@mock.patch('dapla.backports.FileClient')
def test_show_leaf_folder_with_file_details(file_client_mock):
file_client_mock.get_gcs_file_system.return_value = GCSFileSystem()
result = details('gs://anaconda-public-data/iris')
assert result == [{'Created': '2017-04-05T21:51:42.503Z',
'Name': '/iris/iris.csv',
'Size': 4636,
'Updated': '2017-04-05T21:51:42.503Z'}]


@mock.patch('dapla.backports.FileClient')
Expand Down

0 comments on commit 01e8e16

Please sign in to comment.