Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable https reduction (off NGINX server only) #245

Open
wants to merge 6 commits into
base: pyfive
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions activestorage/active.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import concurrent.futures
import os
import fsspec
import numpy as np
import pathlib
import urllib
Expand Down Expand Up @@ -50,6 +51,18 @@ def load_from_s3(uri, storage_options=None):
return ds


def load_from_https(uri):
"""
Load a Dataset from a netCDF4 file on an https server (NGINX).
"""
#TODO need to test if NGINX server behind https://
fs = fsspec.filesystem('http')
http_file = fs.open(uri, 'rb')
ds = pyfive.File(http_file)
print(f"Dataset loaded from https with Pyfive: {uri}")
return ds


def get_missing_attributes(ds):
""""
Load all the missing attributes we need from a netcdf file
Expand Down Expand Up @@ -151,6 +164,8 @@ def __init__(
self.filename = self.ds
elif input_variable and self.storage_type == "s3":
self.filename = self.ds.id._filename
elif input_variable and self.storage_type == "https":
self.filename = self.ds

# get storage_options
self.storage_options = storage_options
Expand Down Expand Up @@ -187,6 +202,8 @@ def __load_nc_file(self):
nc = pyfive.File(self.uri)
elif self.storage_type == "s3":
nc = load_from_s3(self.uri, self.storage_options)
elif self.storage_type == "https":
nc = load_from_https(self.uri)
self.filename = self.uri
self.ds = nc[ncvar]

Expand Down
40 changes: 28 additions & 12 deletions activestorage/storage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Active storage module."""
import fsspec
import numpy as np
import pyfive

Expand Down Expand Up @@ -34,18 +35,33 @@ def reduce_chunk(rfile,
#FIXME: for the moment, open the file every time ... we might want to do that, or not
# we could just use an instance of pyfive.high_level.Dataset.id
# passed directly from active.py, as below
with open(rfile,'rb') as open_file:
# get the data
chunk = read_block(open_file, offset, size)
# reverse any compression and filters
chunk = filter_pipeline(chunk, compression, filters)
# make it a numpy array of bytes
chunk = ensure_ndarray(chunk)
# convert to the appropriate data type
chunk = chunk.view(dtype)
# sort out ordering and convert to the parent hyperslab dimensions
chunk = chunk.reshape(-1, order='A')
chunk = chunk.reshape(shape, order=order)
try:
with open(rfile,'rb') as open_file:
# get the data
chunk = read_block(open_file, offset, size)
# reverse any compression and filters
chunk = filter_pipeline(chunk, compression, filters)
# make it a numpy array of bytes
chunk = ensure_ndarray(chunk)
# convert to the appropriate data type
chunk = chunk.view(dtype)
# sort out ordering and convert to the parent hyperslab dimensions
chunk = chunk.reshape(-1, order='A')
chunk = chunk.reshape(shape, order=order)
except FileNotFoundError: # could a https file
fs = fsspec.filesystem('http')
with fs.open(rfile, 'rb') as open_file:
# get the data
chunk = read_block(open_file, offset, size)
# reverse any compression and filters
chunk = filter_pipeline(chunk, compression, filters)
# make it a numpy array of bytes
chunk = ensure_ndarray(chunk)
# convert to the appropriate data type
chunk = chunk.view(dtype)
# sort out ordering and convert to the parent hyperslab dimensions
chunk = chunk.reshape(-1, order='A')
chunk = chunk.reshape(shape, order=order)
else:
class storeinfo: pass
storeinfo.byte_offset = offset
Expand Down
31 changes: 31 additions & 0 deletions tests/test_real_https.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import os
import numpy as np

from activestorage.active import Active
from activestorage.active import load_from_https


def test_https():
"""Run a true test with a https FILE."""
test_file_uri = "https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/AerChemMIP/MOHC/UKESM1-0-LL/ssp370SST-lowNTCF/r1i1p1f2/Amon/cl/gn/latest/cl_Amon_UKESM1-0-LL_ssp370SST-lowNTCF_r1i1p1f2_gn_205001-209912.nc"

active = Active(test_file_uri, "cl", storage_type="https")
active._version = 2
active._method = "min"
result = active[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")


def test_https_dataset():
"""Run a true test with a https DATASET."""
test_file_uri = "https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/AerChemMIP/MOHC/UKESM1-0-LL/ssp370SST-lowNTCF/r1i1p1f2/Amon/cl/gn/latest/cl_Amon_UKESM1-0-LL_ssp370SST-lowNTCF_r1i1p1f2_gn_205001-209912.nc"
dataset = load_from_https(test_file_uri)
av = dataset['cl']

active = Active(av, storage_type="https")
active._version = 2
active._method = "min"
result = active[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")
Loading