Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add file download functinality to ERDDAP #330

Merged
merged 7 commits into from
Jan 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions erddapy/core/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,52 @@ def get_download_url(

url = _distinct(url, distinct)
return url


download_formats = [
"asc",
"csv",
"csvp",
"csv0",
"dataTable",
"das",
"dds",
"dods",
"esriCsv",
"fgdc",
"geoJson",
"graph",
"help",
"html",
"iso19115",
"itx",
"json",
"jsonlCSV1",
"jsonlCSV",
"jsonlKVP",
"mat",
"nc",
"ncHeader",
"ncCF",
"ncCFHeader",
"ncCFMA",
"ncCFMAHeader",
"nccsv",
"nccsvMetadata",
"ncoJson",
"odvTxt",
"subset",
"tsv",
"tsvp",
"tsv0",
"wav",
"xhtml",
"kml",
"smallPdf",
"pdf",
"largePdf",
"smallPng",
"png",
"largePng",
"transparentPng",
]
24 changes: 24 additions & 0 deletions erddapy/erddapy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Pythonic way to access ERDDAP data."""

import functools
import hashlib
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
from urllib.request import urlretrieve

import pandas as pd

Expand All @@ -16,6 +19,7 @@
_distinct,
_format_constraints_url,
_quote_string_constraints,
download_formats,
get_categorize_url,
get_download_url,
get_info_url,
Expand Down Expand Up @@ -471,3 +475,23 @@ def get_var_by_attr(self, dataset_id: OptionalStr = None, **kwargs) -> List[str]
if has_value_flag is True:
vs.append(vname)
return vs

def download_file(
self,
file_type,
):
"""Download the dataset to a file in a user specified format"""
file_type = file_type.lstrip(".")
if file_type not in download_formats:
raise ValueError(
f"Requested filetype {file_type} not available on ERDDAP",
)
url = self.get_download_url(response=file_type)
constraints_str = str(dict(sorted(self.constraints.items()))) + str(
sorted(self.variables),
)
constraints_hash = hashlib.shake_256(constraints_str.encode()).hexdigest(5)
file_name = Path(f"{self.dataset_id}_{constraints_hash}.{file_type}")
callumrollo marked this conversation as resolved.
Show resolved Hide resolved
if not file_name.exists():
urlretrieve(url, file_name)
return file_name
12 changes: 12 additions & 0 deletions tests/test_to_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,15 @@ def test_to_iris_griddap(dataset_griddap):
"""Test converting griddap to an iris cube."""
cubes = dataset_griddap.to_iris()
assert isinstance(cubes, iris.cube.CubeList)


@pytest.mark.web
def test_download_file(dataset_tabledap):
"""Test direct download of tabledap dataset"""
fn = dataset_tabledap.download_file("nc")
ds = xr.load_dataset(fn)
assert ds["time"].name == "time"
assert ds["temperature"].name == "temperature"
dataset_tabledap.variables = dataset_tabledap.variables[::-1]
fn_new = dataset_tabledap.download_file("nc")
assert fn_new == fn