diff --git a/xesn/driver.py b/xesn/driver.py index 98cda83..b01ed12 100644 --- a/xesn/driver.py +++ b/xesn/driver.py @@ -209,6 +209,8 @@ def run_test(self): n_steps=self.config["testing"]["n_steps"], n_spinup=self.config["testing"]["n_spinup"] ) + xds["prediction"] = data.normalize_inverse(xds["prediction"], keep_attrs=True) + xds["truth"] = data.normalize_inverse(xds["truth"], keep_attrs=True) xds.to_zarr(join(self.output_directory, f"test-{i}.zarr"), mode="w") self.localtime.stop() diff --git a/xesn/test/xdata.py b/xesn/test/xdata.py index 4a9270c..28c65f2 100644 --- a/xesn/test/xdata.py +++ b/xesn/test/xdata.py @@ -67,7 +67,9 @@ def test_data(): "z": np.linspace( 0, 50, 5), "time": np.linspace(0, 2000, 200), }, - dims=tester.dimensions) + dims=tester.dimensions, + attrs={"description": "This is some test data!"}, + ) u.name = tester.field_name u.to_dataset().to_zarr(tester.zstore_path, mode="w") yield u @@ -166,16 +168,39 @@ def test_subsample_type(self, test_data): xd.subsample(test_data, mode="training") - def test_normalize(self, test_data): + @pytest.mark.parametrize( + "keep_attrs", (True, False) + ) + def test_normalize(self, test_data, keep_attrs): xd = XData(self.field_name, self.zstore_path, dimensions=self.dimensions, normalization=self.normalization) - test = xd.normalize(test_data) + test = xd.normalize(test_data, keep_attrs=keep_attrs) expected = (test_data - self.normalization["bias"]) / self.normalization["scale"] assert_allclose(test, expected) + if keep_attrs: + assert test.attrs == test_data.attrs + + + @pytest.mark.parametrize( + "keep_attrs", (True, False) + ) + def test_normalize_inverse(self, test_data, keep_attrs): + xd = XData(self.field_name, + self.zstore_path, + dimensions=self.dimensions, + normalization=self.normalization) + + test = xd.normalize_inverse(test_data, keep_attrs) + expected = test_data * self.normalization["scale"] + self.normalization["bias"] + assert_allclose(test, expected) + + if keep_attrs: + assert test.attrs == test_data.attrs + # Some repetition here, but I think it's worth it @pytest.mark.parametrize( diff --git a/xesn/xdata.py b/xesn/xdata.py index a68d626..317386d 100644 --- a/xesn/xdata.py +++ b/xesn/xdata.py @@ -99,7 +99,7 @@ def subsample(self, xda, mode): return xda - def normalize(self, xda): + def normalize(self, xda, keep_attrs=False): """Very simple, this may eventually be hooked up with scikit learn for more advanced normalization. Right now, normalize with scalars :attr:`bias` and :attr:`scale` as @@ -108,18 +108,44 @@ def normalize(self, xda): Args: xda (xarray.DataArray): with the field to be normalized + keep_attrs (bool): if True, keep attributes in xda Returns: xda (xarray.DataArray): normalized """ - # good practice? if self.normalization is None: return xda - # TODO: not really clear how to handle other cases... - # is it worth handling what options should be here? - # assert self.normalization["type"] == "normal-scalar" bias = self.normalization.get("bias", 0.) scale= self.normalization.get("scale", 1.) - return (xda - bias)/scale + + with xr.set_options(keep_attrs=keep_attrs): + result = (xda - bias)/scale + + return result + + + def normalize_inverse(self, xda, keep_attrs=False): + """Do the inverse operation of :meth:`normalize`, i.e., + + .. math:: + xda * scale + bias + + Args: + xda (xarray.DataArray): with the normalized field to be re scaled + keep_attrs (bool): if True, keep attributes in xda + + Returns: + xda (xarray.DataArray): scaled and biased like original data + """ + if self.normalization is None: + return xda + + bias = self.normalization.get("bias", 0.) + scale= self.normalization.get("scale", 1.) + + with xr.set_options(keep_attrs=keep_attrs): + result = xda*scale + bias + + return result