Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix cftime #35

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 37 additions & 16 deletions xorca/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
def trim_and_squeeze(ds,
model_config="GLOBAL",
y_slice=None, x_slice=None,
rm_1d = False,
**kwargs):
"""Remove redundant grid points and drop singleton dimensions.

Expand All @@ -31,6 +32,9 @@ def trim_and_squeeze(ds,
x_slice : tuple
See y_slice. This will override selection along x given by
`model_config`.
rm_1d :
Remove single time dimension in mesh_mask file without interfering
with real data having only 1 time step (e.g. yearly data)

Returns
-------
Expand Down Expand Up @@ -63,9 +67,12 @@ def _is_singleton(ds, dim):
return (ds[dim].size == 1)

def _is_time_dim(ds, dim):
return (dim in orca_names.t_dims and
np.issubdtype(ds[dim].dtype,
np.datetime64))
if (rm_1d and dim in orca_names.t_dims):
return ( False )
else:
return ( dim in orca_names.t_dims and (
np.issubdtype(ds[dim].dtype, np.datetime64)
or np.issubdtype(ds[dim].dtype,'object')))

def _is_z_dim(ds, dim):
return (dim in orca_names.z_dims)
Expand Down Expand Up @@ -232,9 +239,9 @@ def open_mf_or_dataset(data_files, **kwargs):
"""Open data_files as multi-file or a single-file xarray Dataset."""

try:
mesh_mask = xr.open_mfdataset(data_files, chunks={})
mesh_mask = xr.open_mfdataset(data_files, chunks={},use_cftime=False)
except TypeError as e:
mesh_mask = xr.open_dataset(data_files, chunks={})
mesh_mask = xr.open_dataset(data_files, chunks={},use_cftime=False)

return mesh_mask

Expand Down Expand Up @@ -265,7 +272,7 @@ def set_time_independent_vars_to_coords(ds):
if 't' not in ds[v].dims])


def preprocess_orca(mesh_mask, ds, **kwargs):
def preprocess_orca(mesh_mask, ds, m1=True, m2=False, **kwargs):
"""Preprocess orca datasets before concatenating.

This is meant to be used like:
Expand All @@ -288,6 +295,12 @@ def preprocess_orca(mesh_mask, ds, **kwargs):
Chunks for the ds to be preprocessed. Pass chunking for any input
dimension that might be in the input data.

m1 : default True
True if 1. dataset is mesh_mask file - remove time dim

m2 : default False
True if 2. dataset is mesh_mask file - remove time dim

Returns
-------
xarray dataset
Expand All @@ -301,12 +314,18 @@ def preprocess_orca(mesh_mask, ds, **kwargs):
# construct minimal grid-aware data set from mesh-mask info
if not isinstance(mesh_mask, xr.Dataset):
mesh_mask = open_mf_or_dataset(mesh_mask, **kwargs)
mesh_mask = trim_and_squeeze(mesh_mask, **kwargs)
if m1:
mesh_mask = trim_and_squeeze(mesh_mask, **kwargs, rm_1d=True)
else:
mesh_mask = trim_and_squeeze(mesh_mask, **kwargs)
return_ds = create_minimal_coords_ds(mesh_mask, **kwargs)

# make sure dims are called correctly and trim input ds
ds = rename_dims(ds, **kwargs)
ds = trim_and_squeeze(ds, **kwargs)
if m2:
ds = trim_and_squeeze(ds, **kwargs, rm_1d=True)
else:
ds = trim_and_squeeze(ds, **kwargs)

# copy coordinates from the mesh-mask and from the data set
return_ds = copy_coords(return_ds, mesh_mask, **kwargs)
Expand All @@ -330,7 +349,7 @@ def _get_first_time_step_if_any(dobj):


def load_xorca_dataset(data_files=None, aux_files=None, decode_cf=True,
**kwargs):
use_cftime=False, **kwargs):
"""Create a grid-aware NEMO dataset.

Parameters
Expand All @@ -349,6 +368,8 @@ def load_xorca_dataset(data_files=None, aux_files=None, decode_cf=True,
output dims: `("t", "z_c", "z_l", "y_c", "y_r", "x_c", "x_r")`
decode_cf : bool
Do we want the CF decoding to be done already? Default is True.
use_cftime : bool
Default is False. Use for years after 2262.

Returns
-------
Expand Down Expand Up @@ -381,32 +402,32 @@ def load_xorca_dataset(data_files=None, aux_files=None, decode_cf=True,
# distributed performance.
_aux_files_chunks = map(
lambda af: get_all_compatible_chunk_sizes(
input_ds_chunks, xr.open_dataset(af, decode_cf=False)),
input_ds_chunks, xr.open_dataset(af, decode_cf=False, use_cftime=False)),
aux_files)
aux_ds = xr.Dataset()
for af, ac in zip(aux_files, _aux_files_chunks):
aux_ds.update(
rename_dims(xr.open_dataset(af, decode_cf=False,
rename_dims(xr.open_dataset(af, decode_cf=False, use_cftime=False,
chunks=ac)))

# Again, we first have to open all data sets to filter the input chunks.
_data_files_chunks = map(
lambda df: get_all_compatible_chunk_sizes(
input_ds_chunks, xr.open_dataset(df, decode_cf=decode_cf)),
input_ds_chunks, xr.open_dataset(df, decode_cf=decode_cf, use_cftime=use_cftime)),
data_files)

# Automatically combine all data files
ds_xorca = xr.combine_by_coords(
sorted(
map(
lambda ds: preprocess_orca(aux_ds, ds, **kwargs),
map(lambda ds: preprocess_orca(aux_ds, ds, m1=True, m2=False, **kwargs),
map(lambda df, chunks: rename_dims(
xr.open_dataset(df, chunks=chunks, decode_cf=decode_cf),
xr.open_dataset(df, chunks=chunks, decode_cf=decode_cf, use_cftime=use_cftime),
**kwargs),
data_files, _data_files_chunks)),
key=_get_first_time_step_if_any))

# Add info from aux files
ds_xorca.update(preprocess_orca(aux_ds, aux_ds, **kwargs))
ds_xorca.update(preprocess_orca(aux_ds, aux_ds, m1=True, m2=True, **kwargs))

# Chunk the final ds
ds_xorca = ds_xorca.chunk(
Expand Down