Skip to content

Commit

Permalink
Just ignore messed-up T coordinate from PyCPT
Browse files Browse the repository at this point in the history
  • Loading branch information
aaron-kaplan committed Sep 9, 2024
1 parent 85e06b8 commit b4056a2
Showing 1 changed file with 10 additions and 43 deletions.
53 changes: 10 additions & 43 deletions fbfmaproom/data-conversion-scripts/zarrify-forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,40 +46,19 @@ def calc_pne(obs, hindcasts, forecasts, dof=None, quantile_first_year=None, quan

abbrevs = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def convert_ds(ds, issue_month):
'''Replace Gregorian T coordinate with the 360-day calendar used by FbF
and add non-dimension S coordinate.'''

ds = to_360_ds(ds)

# Reconstruct the issue date and add it as a non-dimension coordinate.
# TODO: pycpt has this info at some point; make it save that in the netcdf
# so we don't have to reconstruct it here.
target_month = ds['T'].dt.month[0].item()
target_day = ds['T'].dt.day[0].item()
if target_day == 1:
pass
elif target_day == 16:
target_month += .5
else:
assert False, f"Unexpected target day {target_day}"
lead = target_month
lead = datetime.timedelta(days=((target_month - issue_month) % 12) * 30)
ds.coords['S'] = ('T', ds['T'].values - lead)
return ds

def read_v2_one_issue_month(path):
hindcasts = xr.Dataset(
dict(
mu=open_and_fix(path / 'MME_deterministic_hindcasts.nc'),
var=open_and_fix(path / 'MME_hindcast_prediction_error_variance.nc'),
mu=open_one(path / 'MME_deterministic_hindcasts.nc'),
var=open_one(path / 'MME_hindcast_prediction_error_variance.nc'),
),
)
mu_files = list(path.glob('MME_deterministic_forecast_*.nc'))
var_files = list(path.glob('MME_forecast_prediction_error_variance_*.nc'))
if mu_files and var_files:
mu_da = open_and_fix_mf(mu_files).load()
var_da = open_and_fix_mf(var_files).load()
mu_da = open_multi(mu_files).load()
var_da = open_multi(var_files).load()
forecasts = xr.Dataset(
dict(mu=mu_da, var=var_da)
)
Expand Down Expand Up @@ -116,18 +95,16 @@ def to_360_coord(c):
]

def to_360_ds(ds):
ds['T'] = to_360_coord(ds['T'])
ds['S'] = to_360_coord(ds['S'])
return ds

def load_pne(path):
if (path / 'obs.nc').is_file():
obs_da = open_and_fix(path / 'obs.nc')
obs_da = open_one(path / 'obs.nc')
else:
# for backwards compatibility with some old forecast datasets,
# try again with tsv
obs_da = next(iter(cptio.open_cptdataset(path / 'obs.tsv').data_vars.values()))
obs_da = fix_t(obs_da)
obs = xr.Dataset(dict(
obs=obs_da
))
Expand All @@ -137,35 +114,25 @@ def load_pne(path):
if month_path.exists():
hindcasts, forecasts = read_v2_one_issue_month(month_path)
pne = calc_pne(obs, hindcasts, forecasts)
pne = convert_ds(pne, monthno).swap_dims(T='S')
pne = pne.swap_dims(T='S')
pne = pne.reset_coords(drop=True)
pne = to_360_ds(pne)
pne_per_issue_month.append(pne)
return xr.merge(pne_per_issue_month, compat='no_conflicts')


def open_and_fix(path):
def open_one(path):
da = xr.open_dataarray(path)
da = fix_t(da)
return da

def open_and_fix_mf(paths):
def open_multi(paths):
da = next(iter(xr.open_mfdataset(paths).data_vars.values()))
da = fix_t(da)
return da


def fix_t(ds):
'''Work around a bug in cptio that puts Tf 24 hours too early, and consequently T 12 hours too early.'''
if ds['Tf'][0].dt.day.values != 1:
ds['T'] = ds['T'] + np.timedelta64(12, 'h')
# Just dropping Ti and Tf for now. If we need them we can convert them.
ds = ds.drop_vars(['Ti', 'Tf'])
return ds


def zarrify(path, datadir):
print(path)
pne = load_pne(datadir / 'original-data' / path)
pne = pne.drop_vars('T') # xr.where doesn't like the non-dimension coord?
pne['quantile'] = (pne['quantile'] * 100).astype(int)
pne['pne'] = pne['pne'] * 100
# Some input datasets are in decreasing latitude order, which
Expand Down

0 comments on commit b4056a2

Please sign in to comment.