diff --git a/pyvo/utils/download.py b/pyvo/utils/download.py index 1a63cd1cd..ef67fdc99 100644 --- a/pyvo/utils/download.py +++ b/pyvo/utils/download.py @@ -29,7 +29,7 @@ def http_download(url, verbose=False, **kwargs): """Download file from http(s) url - + Parameters ---------- url: str @@ -44,7 +44,7 @@ def http_download(url, Session to use. If None, create a new one. verbose: bool If True, print progress and debug text - + Keywords -------- additional keywords to be passed to session.request() @@ -61,11 +61,9 @@ def http_download(url, if not local_filepath: local_filepath = _filename_from_url(url) - response = _session.request(method, url, timeout=timeout, - stream=True, **kwargs) + stream=True, **kwargs) - response.raise_for_status() if 'content-length' in response.headers: length = int(response.headers['content-length']) @@ -75,10 +73,9 @@ def http_download(url, else: length = None - if cache and os.path.exists(local_filepath): if length is not None and os.path.getsize(local_filepath) != length: - warn(f'Found cached file but it has the wrong size. Overwriting ...', + warn('Found cached file but it has the wrong size. Overwriting ...', category=PyvoUserWarning) else: if verbose: @@ -90,7 +87,6 @@ def http_download(url, stream=True, **kwargs) response.raise_for_status() - blocksize = astropy.utils.data.conf.download_block_size n_bytes = 0 with ProgressBarOrSpinner(length, f'Downloading URL {url} to {local_filepath} ...') as pb: @@ -128,11 +124,11 @@ def _s3_is_accessible(s3_resource, bucket_name, key): exception = None try: - header_info = s3_client.head_object(Bucket=bucket_name, Key=key) + _ = s3_client.head_object(Bucket=bucket_name, Key=key) accessible = True - except Exception as e: + except Exception as ex: accessible = False - exception = e + exception = ex return accessible, exception @@ -171,7 +167,7 @@ def aws_download(uri=None, name of the user's profile for credentials in ~/.aws/config or ~/.aws/credentials. Use to authenticate the AWS user with boto3. session: boto3.session.Session - Session to use that include authentication if needed. + Session to use that include authentication if needed. If None, create an annonymous one. If given, aws_profile is ignored verbose: bool If True, print progress and debug text @@ -200,7 +196,6 @@ def aws_download(uri=None, if not local_filepath: local_filepath = _filename_from_url(f's3://{bucket_name}/{key}') - if session: if not isinstance(session, boto3.session.Session): raise ValueError('session has to be instance of boto3.session.Session') @@ -254,7 +249,7 @@ def aws_download(uri=None, print(f'Found cached file {local_filepath}.') return local_filepath else: - warn(f'Found cached file but it has the wrong size. Overwriting ...', + warn('Found cached file but it has the wrong size. Overwriting ...', category=PyvoUserWarning) with ProgressBarOrSpinner(length, (f'Downloading {key} to {local_filepath} ...')) as pb: @@ -286,12 +281,12 @@ def _filename_from_url(url): - https://example.com/files/myfile.pdf?user_id=123 - https://example.com/files/myfile.pdf - http://example.com/service?file=/location/myfile.pdf&size=large - + Parameters ---------- url: str url of the file - + """ parsed_url = urlparse(url) path = parsed_url.path @@ -304,5 +299,5 @@ def _filename_from_url(url): filename = os.path.basename(unquote(values[0])) if '.' in filename: break - + return filename diff --git a/pyvo/utils/tests/test_download.py b/pyvo/utils/tests/test_download.py index 500038b59..6f50b5837 100644 --- a/pyvo/utils/tests/test_download.py +++ b/pyvo/utils/tests/test_download.py @@ -25,11 +25,10 @@ except ImportError: HAS_MOTO = False - # Useful variables # For http_download: get_pkg_data_contents = partial( - get_pkg_data_contents, package=__package__, encoding='binary') + get_pkg_data_contents, package=__package__, encoding='binary') data_re = re.compile('http://example.com/data.*') # For aws_download: s3_bucket = 'pyvo-bucket' @@ -70,7 +69,7 @@ def test__filename_from_url(): for url in urls: filename = _filename_from_url(url) - assert(filename == 'myfile.pdf') + assert filename == 'myfile.pdf' @pytest.fixture(name='http_mock') @@ -80,7 +79,7 @@ def callback(request, context): return get_pkg_data_contents(f'data/{fname}') with mocker.register_uri( - 'GET', data_re, content=callback, headers={'content-length':'901'}, + 'GET', data_re, content=callback, headers={'content-length': '901'}, ) as matcher: yield matcher @@ -88,20 +87,20 @@ def callback(request, context): def test_http_download__noPath(http_mock): filename = http_download('http://example.com/data/basic.xml', local_filepath=None, cache=False) - assert(filename == 'basic.xml') + assert filename == 'basic.xml' os.remove('basic.xml') def test_http_download__noFile(http_mock): with pytest.raises(urllib.error.URLError): - filename = http_download('http://example.com/data/nofile.fits') + http_download('http://example.com/data/nofile.fits') def test_http_download__wPath(http_mock): filename = http_download('http://example.com/data/basic.xml', local_filepath='basic2.xml', cache=False) - assert(filename == 'basic2.xml') - assert(os.path.exists('basic2.xml')) + assert filename == 'basic2.xml' + assert os.path.exists('basic2.xml') os.remove('basic2.xml') @@ -111,13 +110,12 @@ def test_http_download__wrong_cache(http_mock): fp.write('some content') # get it from cache with pytest.warns(PyvoUserWarning): - filename = http_download('http://example.com/data/basic.xml', - local_filepath='basic.xml', cache=True) - assert(os.path.getsize('basic.xml') == 901) + http_download('http://example.com/data/basic.xml', + local_filepath='basic.xml', cache=True) + assert os.path.getsize('basic.xml') == 901 os.remove('basic.xml') - @pytest.mark.skipif('not HAS_MOTO') @pytest.fixture(name='s3_mock') def _s3_mock(mocker): @@ -131,7 +129,6 @@ def _s3_mock(mocker): @pytest.mark.skipif('not HAS_MOTO') def test_s3_mock_basic(s3_mock): - s3 = s3_mock.meta.client body = s3_mock.Object(s3_bucket, s3_key).get()['Body'] content = body.read().decode('utf-8') assert content == 'my content' @@ -140,47 +137,47 @@ def test_s3_mock_basic(s3_mock): @pytest.mark.skipif('not HAS_MOTO') def test__s3_is_accessible_yes(s3_mock): accessible, exc = _s3_is_accessible(s3_mock, s3_bucket, s3_key) - assert(accessible) - assert(exc is None) + assert accessible + assert exc is None @pytest.mark.skipif('not HAS_MOTO') def test__s3_is_accessible_no_bucket(s3_mock): accessible, exc = _s3_is_accessible(s3_mock, 'some-bucket', s3_key) - assert(not accessible) - assert('NoSuchBucket' in str(exc)) + assert not accessible + assert 'NoSuchBucket' in str(exc) @pytest.mark.skipif('not HAS_MOTO') def test__s3_is_accessible_no_key(s3_mock): accessible, exc = _s3_is_accessible(s3_mock, s3_bucket, 'does/not/exist') - assert(not accessible) - assert(isinstance(exc, ClientError)) + assert not accessible + assert isinstance(exc, ClientError) errmsg = str(exc) - assert('Not Found' in errmsg and '404' in errmsg) + assert 'Not Found' in errmsg and '404' in errmsg @pytest.mark.skipif('not HAS_MOTO') def test_s3_download__noPath(s3_mock): filename = aws_download(f's3://{s3_bucket}/{s3_key}', - local_filepath=None, cache=False) + local_filepath=None, cache=False) fname = s3_key.split('/')[-1] - assert(filename == fname) + assert filename == fname os.remove(fname) @pytest.mark.skipif('not HAS_MOTO') def test_s3_download__noKey(s3_mock): with pytest.raises(ClientError): - filename = aws_download(f's3://{s3_bucket}/does/not/exist') + aws_download(f's3://{s3_bucket}/does/not/exist') @pytest.mark.skipif('not HAS_MOTO') def test_s3_download__wPath(s3_mock): filename = aws_download(f's3://{s3_bucket}/{s3_key}', - local_filepath='newkey.txt', cache=False) - assert(filename == 'newkey.txt') - assert(os.path.exists('newkey.txt')) + local_filepath='newkey.txt', cache=False) + assert filename == 'newkey.txt' + assert os.path.exists('newkey.txt') os.remove(filename) @@ -191,7 +188,7 @@ def test_aws_download__wrong_cache(s3_mock): fp.write('not my content') # get it from cache with pytest.warns(PyvoUserWarning): - filename = aws_download(f's3://{s3_bucket}/{s3_key}', - local_filepath='somekey.txt', cache=True) - assert(os.path.getsize('somekey.txt') == 10) + aws_download(f's3://{s3_bucket}/{s3_key}', + local_filepath='somekey.txt', cache=True) + assert os.path.getsize('somekey.txt') == 10 os.remove('somekey.txt')