From 1ecab1a7c5fff6d5adcef422558d82361f1818ae Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 25 May 2022 16:32:54 +0200 Subject: [PATCH 01/22] allow different bitinformation implementations --- xbitinfo/xbitinfo.py | 76 +++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 25 deletions(-) diff --git a/xbitinfo/xbitinfo.py b/xbitinfo/xbitinfo.py index 7da5baac..3930930d 100644 --- a/xbitinfo/xbitinfo.py +++ b/xbitinfo/xbitinfo.py @@ -89,7 +89,15 @@ def dict_to_dataset(info_per_bit): return dsb -def get_bitinformation(ds, dim=None, axis=None, label=None, overwrite=False, **kwargs): +def get_bitinformation( + ds, + dim=None, + axis=None, + label=None, + overwrite=False, + implementation="BitInformation.jl", + **kwargs, +): """Wrap `BitInformation.jl.bitinformation() `__. Parameters @@ -106,6 +114,10 @@ def get_bitinformation(ds, dim=None, axis=None, label=None, overwrite=False, **k Label of the json to serialize bitinfo. When string, serialize results to disk into file ``{{label}}.json`` to be reused later. Defaults to ``None``. overwrite : bool If ``False``, try using serialized bitinfo based on label; if true or label does not exist, run bitinformation + implementation : str + Bitinformation algorithm implementation. Valid options are + - BitInformation.jl, the original implementation in julia by Milan Kloewer + - python, a copy of the core functionality of BitInformation.jl in python kwargs to be passed to bitinformation: @@ -193,31 +205,18 @@ def get_bitinformation(ds, dim=None, axis=None, label=None, overwrite=False, **k pbar = tqdm(ds.data_vars) for var in pbar: pbar.set_description("Processing %s" % var) - X = ds[var].values - Main.X = X - if axis is not None: - # in julia convention axis + 1 - axis_jl = axis + 1 - dim = ds[var].dims[axis] - if isinstance(dim, str): - try: - # in julia convention axis + 1 - axis_jl = ds[var].get_axis_num(dim) + 1 - except ValueError: - logging.info( - f"Variable [var] does not have dimension {dim}. Skipping." - ) + if implementation == "BitInformation.jl": + info_per_bit_var = _jl_get_bitinformation(ds, var, axis, dim, kwargs) + if info_per_bit_var is None: continue - assert isinstance(axis_jl, int) - Main.dim = axis_jl - kwargs_str = _get_bitinformation_kwargs_handler(ds[var], kwargs) - logging.debug(f"get_bitinformation(X, dim={dim}, {kwargs_str})") - info_per_bit[var] = {} - info_per_bit[var]["bitinfo"] = jl.eval( - f"get_bitinformation(X, dim={axis_jl}, {kwargs_str})" - ) - info_per_bit[var]["dim"] = dim - info_per_bit[var]["axis"] = axis_jl - 1 + else: + info_per_bit[var] = info_per_bit_var + elif implementation == "python": + pass + else: + raise ValueError( + f"Implementation of bitinformation algortihm {implementation} is unknown. Please choose a different one." + ) if label is not None: with open(label + ".json", "w") as f: logging.debug(f"Save bitinformation to {label + '.json'}") @@ -225,6 +224,33 @@ def get_bitinformation(ds, dim=None, axis=None, label=None, overwrite=False, **k return dict_to_dataset(info_per_bit) +def _jl_get_bitinformation(ds, var, axis, dim, kwargs): + X = ds[var].values + Main.X = X + if axis is not None: + # in julia convention axis + 1 + axis_jl = axis + 1 + dim = ds[var].dims[axis] + if isinstance(dim, str): + try: + # in julia convention axis + 1 + axis_jl = ds[var].get_axis_num(dim) + 1 + except ValueError: + logging.info(f"Variable [var] does not have dimension {dim}. Skipping.") + return + assert isinstance(axis_jl, int) + Main.dim = axis_jl + kwargs_str = _get_bitinformation_kwargs_handler(ds[var], kwargs) + logging.debug(f"get_bitinformation(X, dim={dim}, {kwargs_str})") + info_per_bit = {} + info_per_bit["bitinfo"] = jl.eval( + f"get_bitinformation(X, dim={axis_jl}, {kwargs_str})" + ) + info_per_bit["dim"] = dim + info_per_bit["axis"] = axis_jl - 1 + return info_per_bit + + def _get_bitinformation_along_dims(ds, dim=None, label=None, overwrite=False, **kwargs): """Helper function for :py:func:`xbitinfo.xbitinfo.get_bitinformation` to handle multi-dimensional analysis for each dim specified. From b4cade33fffacc874a4e0ae15e253ef0e42df3cc Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 17 Oct 2022 04:22:30 +0200 Subject: [PATCH 02/22] WIP: python implementation --- xbitinfo/_py_bitinfo.py | 58 +++++++++++++++++++++++++++++++++++++++++ xbitinfo/xbitinfo.py | 31 +++++++++++++++++++--- 2 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 xbitinfo/_py_bitinfo.py diff --git a/xbitinfo/_py_bitinfo.py b/xbitinfo/_py_bitinfo.py new file mode 100644 index 00000000..c18caea4 --- /dev/null +++ b/xbitinfo/_py_bitinfo.py @@ -0,0 +1,58 @@ +import numpy as np + + +def bitpaircount_u1(a, b): + assert a.dtype == "u1" + assert b.dtype == "u1" + unpack_a = ( + a.map_blocks(np.unpackbits, drop_axis=0).compute().astype("u1") + ) # compute needed for correct shape + unpack_b = b.map_blocks(np.unpackbits, drop_axis=0).compute().astype("u1") + index = ((unpack_a << 1) | unpack_b).reshape(-1, 8) + + selection = np.array([0, 1, 2, 3], dtype="u1") + print("selection") + sel = np.where((index[..., np.newaxis]) == selection, True, False) + print("summing stuff") + to_return = sel.sum(axis=0).reshape(8, 2, 2) + print("finished summing stuff") + return to_return + + +def bitpaircount(a, b): + assert a.dtype.kind == "u" + assert b.dtype.kind == "u" + nbytes = max(a.dtype.itemsize, b.dtype.itemsize) + + a, b = np.broadcast_arrays(a, b) + + bytewise_counts = [] + for i in range(nbytes): + s = (nbytes - 1 - i) * 8 + bitc = bitpaircount_u1((a >> s).astype("u1"), (b >> s).astype("u1")) + bytewise_counts.append(bitc) + return np.concatenate(bytewise_counts, axis=0) + + +def mutual_information(a, b, base=2): + size = np.prod(np.broadcast_shapes(a.shape, b.shape)) + print("run bitpaircount") + counts = bitpaircount(a, b) + print("finished bitpaircount") + print(size) + + p = counts.astype("float") / size + pr = p.sum(axis=-1)[..., np.newaxis] + ps = p.sum(axis=-2)[..., np.newaxis, :] + + return np.where(p > 0, p * np.log(p / (pr * ps)), 0).sum(axis=(-1, -2)) / np.log( + base + ) + + +def bitinformation(a, axis=0): + sa = tuple(slice(0, -1) if i == axis else slice(None) for i in range(len(a.shape))) + sb = tuple( + slice(1, None) if i == axis else slice(None) for i in range(len(a.shape)) + ) + return mutual_information(a[sa], a[sb]) diff --git a/xbitinfo/xbitinfo.py b/xbitinfo/xbitinfo.py index 3930930d..91e0fd80 100644 --- a/xbitinfo/xbitinfo.py +++ b/xbitinfo/xbitinfo.py @@ -4,17 +4,18 @@ import numpy as np import xarray as xr +from dask import array as da from julia.api import Julia from tqdm.auto import tqdm from . import __version__ +from . import _py_bitinfo as pb from .julia_helpers import install already_ran = False if not already_ran: already_ran = install(quiet=True) - jl = Julia(compiled_modules=False, debug=False) from julia import Main # noqa: E402 @@ -212,10 +213,14 @@ def get_bitinformation( else: info_per_bit[var] = info_per_bit_var elif implementation == "python": - pass + info_per_bit_var = _py_get_bitinformation(ds, var, axis, dim, kwargs) + if info_per_bit_var is None: + continue + else: + info_per_bit[var] = info_per_bit_var else: raise ValueError( - f"Implementation of bitinformation algortihm {implementation} is unknown. Please choose a different one." + f"Implementation of bitinformation algorithm {implementation} is unknown. Please choose a different one." ) if label is not None: with open(label + ".json", "w") as f: @@ -251,6 +256,26 @@ def _jl_get_bitinformation(ds, var, axis, dim, kwargs): return info_per_bit +def _py_get_bitinformation(ds, var, axis, dim, kwargs=None): + assert ( + kwargs == {} + ), "This implementation only supports the plain bitinfo implementation" + X = da.array(ds[var]).astype(np.uint) + if axis is not None: + dim = ds[var].dims[axis] + if isinstance(dim, str): + try: + axis = ds[var].get_axis_num(dim) + except ValueError: + logging.info(f"Variable {var} does not have dimension {dim}. Skipping.") + return + info_per_bit = {} + info_per_bit["bitinfo"] = pb.bitinformation(X, axis=axis) + info_per_bit["dim"] = dim + info_per_bit["axis"] = axis + return info_per_bit + + def _get_bitinformation_along_dims(ds, dim=None, label=None, overwrite=False, **kwargs): """Helper function for :py:func:`xbitinfo.xbitinfo.get_bitinformation` to handle multi-dimensional analysis for each dim specified. From 814ca5963f55f2255ff2114f39bd5311e162522d Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 20 Jun 2022 15:29:15 +0200 Subject: [PATCH 03/22] Remove debug print statements --- xbitinfo/_py_bitinfo.py | 6 ------ xbitinfo/xbitinfo.py | 16 +++++++++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/xbitinfo/_py_bitinfo.py b/xbitinfo/_py_bitinfo.py index c18caea4..8fe002f3 100644 --- a/xbitinfo/_py_bitinfo.py +++ b/xbitinfo/_py_bitinfo.py @@ -11,11 +11,8 @@ def bitpaircount_u1(a, b): index = ((unpack_a << 1) | unpack_b).reshape(-1, 8) selection = np.array([0, 1, 2, 3], dtype="u1") - print("selection") sel = np.where((index[..., np.newaxis]) == selection, True, False) - print("summing stuff") to_return = sel.sum(axis=0).reshape(8, 2, 2) - print("finished summing stuff") return to_return @@ -36,10 +33,7 @@ def bitpaircount(a, b): def mutual_information(a, b, base=2): size = np.prod(np.broadcast_shapes(a.shape, b.shape)) - print("run bitpaircount") counts = bitpaircount(a, b) - print("finished bitpaircount") - print(size) p = counts.astype("float") / size pr = p.sum(axis=-1)[..., np.newaxis] diff --git a/xbitinfo/xbitinfo.py b/xbitinfo/xbitinfo.py index 91e0fd80..ee08aa94 100644 --- a/xbitinfo/xbitinfo.py +++ b/xbitinfo/xbitinfo.py @@ -229,7 +229,7 @@ def get_bitinformation( return dict_to_dataset(info_per_bit) -def _jl_get_bitinformation(ds, var, axis, dim, kwargs): +def _jl_get_bitinformation(ds, var, axis, dim, kwargs={}): X = ds[var].values Main.X = X if axis is not None: @@ -256,10 +256,16 @@ def _jl_get_bitinformation(ds, var, axis, dim, kwargs): return info_per_bit -def _py_get_bitinformation(ds, var, axis, dim, kwargs=None): - assert ( - kwargs == {} - ), "This implementation only supports the plain bitinfo implementation" +def _py_get_bitinformation(ds, var, axis, dim, kwargs={}): + if "set_zero_insignificant" in kwargs.keys(): + if kwargs["set_zero_insignificant"]: + raise NotImplementedError( + "set_zero_insignificant is not implemented in the python implementation" + ) + else: + assert ( + kwargs == {} + ), "This implementation only supports the plain bitinfo implementation" X = da.array(ds[var]).astype(np.uint) if axis is not None: dim = ds[var].dims[axis] From 7158c487c23c391a9683ca9a90e0352c9bccae6d Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 20 Jun 2022 15:58:14 +0200 Subject: [PATCH 04/22] fix logging message --- xbitinfo/xbitinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xbitinfo/xbitinfo.py b/xbitinfo/xbitinfo.py index ee08aa94..673b3e0f 100644 --- a/xbitinfo/xbitinfo.py +++ b/xbitinfo/xbitinfo.py @@ -241,7 +241,7 @@ def _jl_get_bitinformation(ds, var, axis, dim, kwargs={}): # in julia convention axis + 1 axis_jl = ds[var].get_axis_num(dim) + 1 except ValueError: - logging.info(f"Variable [var] does not have dimension {dim}. Skipping.") + logging.info(f"Variable {var} does not have dimension {dim}. Skipping.") return assert isinstance(axis_jl, int) Main.dim = axis_jl From 4bade4c34228f2a3d7177f98dc763e118e103067 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 22 Jun 2022 15:33:03 +0200 Subject: [PATCH 05/22] WIP: improve map_block call --- xbitinfo/_py_bitinfo.py | 16 +++++++++++++--- xbitinfo/xbitinfo.py | 3 ++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/xbitinfo/_py_bitinfo.py b/xbitinfo/_py_bitinfo.py index 8fe002f3..73f44584 100644 --- a/xbitinfo/_py_bitinfo.py +++ b/xbitinfo/_py_bitinfo.py @@ -4,10 +4,20 @@ def bitpaircount_u1(a, b): assert a.dtype == "u1" assert b.dtype == "u1" - unpack_a = ( - a.map_blocks(np.unpackbits, drop_axis=0).compute().astype("u1") + unpack_a = a.map_blocks( + np.unpackbits, + drop_axis=0, + meta=np.array((), dtype=np.uint8), + chunks=(a.size * 8,), + ).astype( + "u1" ) # compute needed for correct shape - unpack_b = b.map_blocks(np.unpackbits, drop_axis=0).compute().astype("u1") + unpack_b = b.map_blocks( + np.unpackbits, + drop_axis=0, + meta=np.array((), dtype=np.uint8), + chunks=(b.size * 8,), + ).astype("u1") index = ((unpack_a << 1) | unpack_b).reshape(-1, 8) selection = np.array([0, 1, 2, 3], dtype="u1") diff --git a/xbitinfo/xbitinfo.py b/xbitinfo/xbitinfo.py index 673b3e0f..d91accce 100644 --- a/xbitinfo/xbitinfo.py +++ b/xbitinfo/xbitinfo.py @@ -276,7 +276,8 @@ def _py_get_bitinformation(ds, var, axis, dim, kwargs={}): logging.info(f"Variable {var} does not have dimension {dim}. Skipping.") return info_per_bit = {} - info_per_bit["bitinfo"] = pb.bitinformation(X, axis=axis) + logging.info("Calling python implementation now") + info_per_bit["bitinfo"] = pb.bitinformation(X, axis=axis).compute() info_per_bit["dim"] = dim info_per_bit["axis"] = axis return info_per_bit From ecc51eef92f4b57bb99b0cc040274b5b0609ee0a Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Sun, 10 Jul 2022 16:30:09 +0200 Subject: [PATCH 06/22] Suppress calculation on zeros --- xbitinfo/_py_bitinfo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xbitinfo/_py_bitinfo.py b/xbitinfo/_py_bitinfo.py index 73f44584..732a386f 100644 --- a/xbitinfo/_py_bitinfo.py +++ b/xbitinfo/_py_bitinfo.py @@ -1,4 +1,5 @@ import numpy as np +import numpy.ma as nm def bitpaircount_u1(a, b): @@ -46,12 +47,11 @@ def mutual_information(a, b, base=2): counts = bitpaircount(a, b) p = counts.astype("float") / size + p = nm.masked_equal(p, 0, copy=False) pr = p.sum(axis=-1)[..., np.newaxis] ps = p.sum(axis=-2)[..., np.newaxis, :] - - return np.where(p > 0, p * np.log(p / (pr * ps)), 0).sum(axis=(-1, -2)) / np.log( - base - ) + mutual_info = (p * np.ma.log(p / (pr * ps))).sum(axis=(-1, -2)) / np.log(base) + return mutual_info def bitinformation(a, axis=0): From 5f860aecd2e8927be6e607aff7aea8628c6e6eb0 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 6 Sep 2022 18:43:13 -0700 Subject: [PATCH 07/22] Add dask --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 2f8e1e5f..2bc00205 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ + +dask xarray julia tqdm From f7ea608c4c0bcdd0bafad6b1b4bbf6c364a0aad6 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 6 Sep 2022 18:43:31 -0700 Subject: [PATCH 08/22] Typo --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2bc00205..8b8e6ada 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ - dask xarray julia From 88c25164755a7b9c17ef4c835d343ef6dc6809f6 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 17 Oct 2022 23:14:51 +0200 Subject: [PATCH 09/22] use dask.ma functions --- xbitinfo/_py_bitinfo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xbitinfo/_py_bitinfo.py b/xbitinfo/_py_bitinfo.py index 732a386f..32621538 100644 --- a/xbitinfo/_py_bitinfo.py +++ b/xbitinfo/_py_bitinfo.py @@ -1,3 +1,4 @@ +import dask.array as da import numpy as np import numpy.ma as nm @@ -47,7 +48,7 @@ def mutual_information(a, b, base=2): counts = bitpaircount(a, b) p = counts.astype("float") / size - p = nm.masked_equal(p, 0, copy=False) + p = da.ma.masked_equal(p, 0) pr = p.sum(axis=-1)[..., np.newaxis] ps = p.sum(axis=-2)[..., np.newaxis, :] mutual_info = (p * np.ma.log(p / (pr * ps))).sum(axis=(-1, -2)) / np.log(base) From 18217c2658dacb60c52c1f7d2108803e1489e45f Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 17 Oct 2022 23:15:57 +0200 Subject: [PATCH 10/22] add implementation arg to additional funcs --- xbitinfo/xbitinfo.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/xbitinfo/xbitinfo.py b/xbitinfo/xbitinfo.py index d91accce..dbe045b7 100644 --- a/xbitinfo/xbitinfo.py +++ b/xbitinfo/xbitinfo.py @@ -170,12 +170,22 @@ def get_bitinformation( if dim is None and axis is None: # gather bitinformation on all axis return _get_bitinformation_along_dims( - ds, dim=dim, label=label, overwrite=overwrite, **kwargs + ds, + dim=dim, + label=label, + overwrite=overwrite, + implementation=implementation, + **kwargs, ) if isinstance(dim, list) and axis is None: # gather bitinformation on dims specified return _get_bitinformation_along_dims( - ds, dim=dim, label=label, overwrite=overwrite, **kwargs + ds, + dim=dim, + label=label, + overwrite=overwrite, + implementation=implementation, + **kwargs, ) else: # gather bitinformation along one axis @@ -283,7 +293,14 @@ def _py_get_bitinformation(ds, var, axis, dim, kwargs={}): return info_per_bit -def _get_bitinformation_along_dims(ds, dim=None, label=None, overwrite=False, **kwargs): +def _get_bitinformation_along_dims( + ds, + dim=None, + label=None, + overwrite=False, + implementation="BitInformation.jl", + **kwargs, +): """Helper function for :py:func:`xbitinfo.xbitinfo.get_bitinformation` to handle multi-dimensional analysis for each dim specified. Simple wrapper around :py:func:`xbitinfo.xbitinfo.get_bitinformation`, which calls :py:func:`xbitinfo.xbitinfo.get_bitinformation` From 902731f616ab9b687ec7c266cc538bb514316e8b Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 17 Oct 2022 23:18:14 +0200 Subject: [PATCH 11/22] flatten array before apply map_blocks --- xbitinfo/_py_bitinfo.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/xbitinfo/_py_bitinfo.py b/xbitinfo/_py_bitinfo.py index 32621538..ceb3101e 100644 --- a/xbitinfo/_py_bitinfo.py +++ b/xbitinfo/_py_bitinfo.py @@ -6,20 +6,26 @@ def bitpaircount_u1(a, b): assert a.dtype == "u1" assert b.dtype == "u1" - unpack_a = a.map_blocks( - np.unpackbits, - drop_axis=0, - meta=np.array((), dtype=np.uint8), - chunks=(a.size * 8,), - ).astype( - "u1" - ) # compute needed for correct shape - unpack_b = b.map_blocks( - np.unpackbits, - drop_axis=0, - meta=np.array((), dtype=np.uint8), - chunks=(b.size * 8,), - ).astype("u1") + unpack_a = ( + a.flatten() + .map_blocks( + np.unpackbits, + drop_axis=0, + meta=np.array((), dtype=np.uint8), + chunks=(a.size * 8,), + ) + .astype("u1") + ) + unpack_b = ( + b.flatten() + .map_blocks( + np.unpackbits, + drop_axis=0, + meta=np.array((), dtype=np.uint8), + chunks=(b.size * 8,), + ) + .astype("u1") + ) index = ((unpack_a << 1) | unpack_b).reshape(-1, 8) selection = np.array([0, 1, 2, 3], dtype="u1") From cf4b3dac5b609e89845e79c63ed44276f15a982b Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 17 Oct 2022 23:19:05 +0200 Subject: [PATCH 12/22] remove np.ma.log as it loads eagerly --- xbitinfo/_py_bitinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xbitinfo/_py_bitinfo.py b/xbitinfo/_py_bitinfo.py index ceb3101e..65c2d7b1 100644 --- a/xbitinfo/_py_bitinfo.py +++ b/xbitinfo/_py_bitinfo.py @@ -57,7 +57,7 @@ def mutual_information(a, b, base=2): p = da.ma.masked_equal(p, 0) pr = p.sum(axis=-1)[..., np.newaxis] ps = p.sum(axis=-2)[..., np.newaxis, :] - mutual_info = (p * np.ma.log(p / (pr * ps))).sum(axis=(-1, -2)) / np.log(base) + mutual_info = (p * np.log(p / (pr * ps))).sum(axis=(-1, -2)) / np.log(base) return mutual_info From 0f7dcbbb7998a2f8691878729c2297219b92398a Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 18 Oct 2022 01:33:13 +0200 Subject: [PATCH 13/22] add python implementation test --- tests/test_get_bitinformation.py | 92 +++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 31 deletions(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index 2fcb3feb..e91187d0 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -63,70 +63,96 @@ def bitinfo_assert_different(bitinfo1, bitinfo2): assert (bitinfo1 != bitinfo2).any() -def test_get_bitinformation_returns_dataset(): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_returns_dataset(implementation=None): """Test xb.get_bitinformation returns xr.Dataset.""" ds = xr.tutorial.load_dataset("rasm") - assert isinstance(xb.get_bitinformation(ds, axis=0), xr.Dataset) + assert isinstance( + xb.get_bitinformation(ds, axis=0, implementation=implementation), xr.Dataset + ) -def test_get_bitinformation_dim(): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_dim(implementation=None): """Test xb.get_bitinformation is sensitive to dim.""" ds = xr.tutorial.load_dataset("rasm") - bitinfo0 = xb.get_bitinformation(ds, axis=0) - bitinfo2 = xb.get_bitinformation(ds, axis=2) + bitinfo0 = xb.get_bitinformation(ds, axis=0, implementation=implementation) + bitinfo2 = xb.get_bitinformation(ds, axis=2, implementation=implementation) assert_different(bitinfo0, bitinfo2) -def test_get_bitinformation_dim_string_equals_axis_int(): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_dim_string_equals_axis_int(implementation=None): """Test xb.get_bitinformation undestands xarray dimension names the same way as axis as integers.""" ds = xr.tutorial.load_dataset("rasm") - bitinfox = xb.get_bitinformation(ds, dim="x") - bitinfo2 = xb.get_bitinformation(ds, axis=2) + bitinfox = xb.get_bitinformation(ds, dim="x", implementation=implementation) + bitinfo2 = xb.get_bitinformation(ds, axis=2, implementation=implementation) assert_identical(bitinfox, bitinfo2) -def test_get_bitinformation_masked_value(): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_masked_value(implementation): """Test xb.get_bitinformation is sensitive to masked_value.""" ds = xr.tutorial.load_dataset("rasm") - bitinfo = xb.get_bitinformation(ds, dim="x") - bitinfo_no_mask = xb.get_bitinformation(ds, dim="x", masked_value="nothing") - bitinfo_no_mask_None = xb.get_bitinformation(ds, dim="x", masked_value=None) + bitinfo = xb.get_bitinformation(ds, dim="x", implementation=implementation) + bitinfo_no_mask = xb.get_bitinformation( + ds, dim="x", masked_value="nothing", implementation=implementation + ) + bitinfo_no_mask_None = xb.get_bitinformation( + ds, dim="x", masked_value=None, implementation=implementation + ) assert_identical(bitinfo_no_mask, bitinfo_no_mask_None) assert_different(bitinfo, bitinfo_no_mask) -def test_get_bitinformation_set_zero_insignificant(): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_set_zero_insignificant(implementation=None): """Test xb.get_bitinformation is sensitive to set_zero_insignificant.""" ds = xr.tutorial.load_dataset("air_temperature") dim = "lon" - bitinfo_szi_False = xb.get_bitinformation(ds, dim=dim, set_zero_insignificant=False) - bitinfo_szi_True = xb.get_bitinformation(ds, dim=dim, set_zero_insignificant=True) - bitinfo = xb.get_bitinformation(ds, dim=dim) + bitinfo_szi_False = xb.get_bitinformation( + ds, dim=dim, set_zero_insignificant=False, implementation=implementation + ) + bitinfo_szi_True = xb.get_bitinformation( + ds, dim=dim, set_zero_insignificant=True, implementation=implementation + ) + bitinfo = xb.get_bitinformation(ds, dim=dim, implementation=implementation) assert_different(bitinfo, bitinfo_szi_False) assert_identical(bitinfo, bitinfo_szi_True) -def test_get_bitinformation_confidence(): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_confidence(implementation=None): """Test xb.get_bitinformation is sensitive to confidence.""" ds = xr.tutorial.load_dataset("air_temperature") dim = "lon" - bitinfo_conf99 = xb.get_bitinformation(ds, dim=dim, confidence=0.99) - bitinfo_conf50 = xb.get_bitinformation(ds, dim=dim, confidence=0.5) - bitinfo = xb.get_bitinformation(ds, dim=dim) + bitinfo_conf99 = xb.get_bitinformation( + ds, dim=dim, confidence=0.99, implementation=implementation + ) + bitinfo_conf50 = xb.get_bitinformation( + ds, dim=dim, confidence=0.5, implementation=implementation + ) + bitinfo = xb.get_bitinformation(ds, dim=dim, implementation=implementation) assert_different(bitinfo_conf99, bitinfo_conf50) assert_identical(bitinfo, bitinfo_conf99) -def test_get_bitinformation_label(rasm): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_label(rasm, implementation=None): """Test xb.get_bitinformation serializes when label given.""" ds = rasm - xb.get_bitinformation(ds, dim="x", label="./tmp_testdir/rasm") + xb.get_bitinformation( + ds, dim="x", label="./tmp_testdir/rasm", implementation=implementation + ) assert os.path.exists("./tmp_testdir/rasm.json") # second call should be faster - xb.get_bitinformation(ds, dim="x", label="./tmp_testdir/rasm") + xb.get_bitinformation( + ds, dim="x", label="./tmp_testdir/rasm", implementation=implementation + ) os.remove("./tmp_testdir/rasm.json") +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) @pytest.mark.parametrize("dtype", ["float64", "float32", "float16"]) def test_get_bitinformation_dtype(rasm, dtype): """Test xb.get_bitinformation returns correct number of bits depending on dtype.""" @@ -138,10 +164,11 @@ def test_get_bitinformation_dtype(rasm, dtype): ) -def test_get_bitinformation_multidim(rasm): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_multidim(rasm, implementation=None): """Test xb.get_bitinformation runs on all dimensions by default""" ds = rasm - bi = xb.get_bitinformation(ds) + bi = xb.get_bitinformation(ds, implementation=implementation) # check length of dimension assert bi.dims["dim"] == len(ds.dims) bi_time = bi.sel(dim="time").Tair.values @@ -152,28 +179,31 @@ def test_get_bitinformation_multidim(rasm): assert any(bi_y != bi_x) -def test_get_bitinformation_different_variables_dims(rasm): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_different_variables_dims(rasm, implementation=None): """Test xb.get_bitinformation runs with variables of different dimensionality""" ds = rasm # add variable with different dimensionality ds["Tair_mean"] = ds.Tair.mean(dim="time") - bi = xb.get_bitinformation(ds) + bi = xb.get_bitinformation(ds, implementation=implementation) assert all(np.isnan(bi.Tair_mean.sel(dim="time"))) bi_Tair_mean_x = bi.Tair_mean.sel(dim="x") bi_Tair_x = bi.Tair.sel(dim="x") assert_different(bi_Tair_mean_x, bi_Tair_x) -def test_get_bitinformation_different_dtypes(rasm): +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_different_dtypes(rasm, implementation=None): ds = rasm ds["Tair32"] = ds.Tair.astype("float32") ds["Tair16"] = ds.Tair.astype("float16") - bi = xb.get_bitinformation(ds) + bi = xb.get_bitinformation(ds, implementation=implementation) for bitdim in ["bit16", "bit32", "bit64"]: assert bitdim in bi.dims assert bitdim in bi.coords -def test_get_bitinformation_dim_list(rasm): - bi = xb.get_bitinformation(rasm, dim=["x", "y"]) +@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +def test_get_bitinformation_dim_list(rasm, implementation=None): + bi = xb.get_bitinformation(rasm, dim=["x", "y"], implementation=implementation) assert (bi.dim == ["x", "y"]).all() From 9a8e1f34211f844171eeb2a22f6882748ecef1c0 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 18 Oct 2022 01:51:52 +0200 Subject: [PATCH 14/22] remove default arg --- tests/test_get_bitinformation.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index e91187d0..5bcf7557 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -64,7 +64,7 @@ def bitinfo_assert_different(bitinfo1, bitinfo2): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_returns_dataset(implementation=None): +def test_get_bitinformation_returns_dataset(implementation): """Test xb.get_bitinformation returns xr.Dataset.""" ds = xr.tutorial.load_dataset("rasm") assert isinstance( @@ -73,7 +73,7 @@ def test_get_bitinformation_returns_dataset(implementation=None): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_dim(implementation=None): +def test_get_bitinformation_dim(implementation): """Test xb.get_bitinformation is sensitive to dim.""" ds = xr.tutorial.load_dataset("rasm") bitinfo0 = xb.get_bitinformation(ds, axis=0, implementation=implementation) @@ -82,7 +82,7 @@ def test_get_bitinformation_dim(implementation=None): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_dim_string_equals_axis_int(implementation=None): +def test_get_bitinformation_dim_string_equals_axis_int(implementation): """Test xb.get_bitinformation undestands xarray dimension names the same way as axis as integers.""" ds = xr.tutorial.load_dataset("rasm") bitinfox = xb.get_bitinformation(ds, dim="x", implementation=implementation) @@ -106,7 +106,7 @@ def test_get_bitinformation_masked_value(implementation): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_set_zero_insignificant(implementation=None): +def test_get_bitinformation_set_zero_insignificant(implementation): """Test xb.get_bitinformation is sensitive to set_zero_insignificant.""" ds = xr.tutorial.load_dataset("air_temperature") dim = "lon" @@ -122,7 +122,7 @@ def test_get_bitinformation_set_zero_insignificant(implementation=None): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_confidence(implementation=None): +def test_get_bitinformation_confidence(implementation): """Test xb.get_bitinformation is sensitive to confidence.""" ds = xr.tutorial.load_dataset("air_temperature") dim = "lon" @@ -138,7 +138,7 @@ def test_get_bitinformation_confidence(implementation=None): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_label(rasm, implementation=None): +def test_get_bitinformation_label(rasm, implementation): """Test xb.get_bitinformation serializes when label given.""" ds = rasm xb.get_bitinformation( @@ -165,7 +165,7 @@ def test_get_bitinformation_dtype(rasm, dtype): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_multidim(rasm, implementation=None): +def test_get_bitinformation_multidim(rasm, implementation): """Test xb.get_bitinformation runs on all dimensions by default""" ds = rasm bi = xb.get_bitinformation(ds, implementation=implementation) @@ -180,7 +180,7 @@ def test_get_bitinformation_multidim(rasm, implementation=None): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_different_variables_dims(rasm, implementation=None): +def test_get_bitinformation_different_variables_dims(rasm, implementation): """Test xb.get_bitinformation runs with variables of different dimensionality""" ds = rasm # add variable with different dimensionality @@ -193,7 +193,7 @@ def test_get_bitinformation_different_variables_dims(rasm, implementation=None): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_different_dtypes(rasm, implementation=None): +def test_get_bitinformation_different_dtypes(rasm, implementation): ds = rasm ds["Tair32"] = ds.Tair.astype("float32") ds["Tair16"] = ds.Tair.astype("float16") @@ -204,6 +204,6 @@ def test_get_bitinformation_different_dtypes(rasm, implementation=None): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) -def test_get_bitinformation_dim_list(rasm, implementation=None): +def test_get_bitinformation_dim_list(rasm, implementation): bi = xb.get_bitinformation(rasm, dim=["x", "y"], implementation=implementation) assert (bi.dim == ["x", "y"]).all() From 6207b70fb35de1f02ab88cdda3126cfb75000842 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 18 Oct 2022 01:52:13 +0200 Subject: [PATCH 15/22] add missing arg --- tests/test_get_bitinformation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index 5bcf7557..ed835003 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -68,7 +68,7 @@ def test_get_bitinformation_returns_dataset(implementation): """Test xb.get_bitinformation returns xr.Dataset.""" ds = xr.tutorial.load_dataset("rasm") assert isinstance( - xb.get_bitinformation(ds, axis=0, implementation=implementation), xr.Dataset + xb.get_bitinformation(ds, implementation=implementation, axis=0), xr.Dataset ) @@ -154,7 +154,7 @@ def test_get_bitinformation_label(rasm, implementation): @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) @pytest.mark.parametrize("dtype", ["float64", "float32", "float16"]) -def test_get_bitinformation_dtype(rasm, dtype): +def test_get_bitinformation_dtype(rasm, dtype, implementation): """Test xb.get_bitinformation returns correct number of bits depending on dtype.""" ds = rasm.astype(dtype) v = list(ds.data_vars)[0] From bb521c1c8d6c521b274391ef51dd5157f653e4ee Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 18 Oct 2022 18:27:44 +0200 Subject: [PATCH 16/22] test set_zero_insignificant for python implementation --- tests/test_get_bitinformation.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index ed835003..60788aba 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -110,15 +110,18 @@ def test_get_bitinformation_set_zero_insignificant(implementation): """Test xb.get_bitinformation is sensitive to set_zero_insignificant.""" ds = xr.tutorial.load_dataset("air_temperature") dim = "lon" + bitinfo = xb.get_bitinformation(ds, dim=dim, implementation=implementation) bitinfo_szi_False = xb.get_bitinformation( ds, dim=dim, set_zero_insignificant=False, implementation=implementation ) - bitinfo_szi_True = xb.get_bitinformation( - ds, dim=dim, set_zero_insignificant=True, implementation=implementation - ) - bitinfo = xb.get_bitinformation(ds, dim=dim, implementation=implementation) - assert_different(bitinfo, bitinfo_szi_False) - assert_identical(bitinfo, bitinfo_szi_True) + try: + bitinfo_szi_True = xb.get_bitinformation( + ds, dim=dim, set_zero_insignificant=True, implementation=implementation + ) + assert_different(bitinfo, bitinfo_szi_True) + except NotImplementedError: + assert implementation == "python" + assert_identical(bitinfo, bitinfo_szi_False) @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) From 31eb91545062418d2cbae4bbc6401ef5d6bf2b0d Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 18 Oct 2022 20:03:36 +0200 Subject: [PATCH 17/22] skip confidence interval test for python implementation --- tests/test_get_bitinformation.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index 60788aba..64f54656 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -129,15 +129,18 @@ def test_get_bitinformation_confidence(implementation): """Test xb.get_bitinformation is sensitive to confidence.""" ds = xr.tutorial.load_dataset("air_temperature") dim = "lon" - bitinfo_conf99 = xb.get_bitinformation( - ds, dim=dim, confidence=0.99, implementation=implementation - ) - bitinfo_conf50 = xb.get_bitinformation( - ds, dim=dim, confidence=0.5, implementation=implementation - ) bitinfo = xb.get_bitinformation(ds, dim=dim, implementation=implementation) - assert_different(bitinfo_conf99, bitinfo_conf50) - assert_identical(bitinfo, bitinfo_conf99) + try: + bitinfo_conf99 = xb.get_bitinformation( + ds, dim=dim, confidence=0.99, implementation=implementation + ) + bitinfo_conf50 = xb.get_bitinformation( + ds, dim=dim, confidence=0.5, implementation=implementation + ) + assert_different(bitinfo_conf99, bitinfo_conf50) + assert_identical(bitinfo, bitinfo_conf99) + except AssertionError: + assert implementation == "python" @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) From cf7a36d8c9edae8de25037fb5604c6608d2baa35 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 18 Oct 2022 20:26:09 +0200 Subject: [PATCH 18/22] fix test --- tests/test_get_bitinformation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index 64f54656..1d8135dd 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -118,10 +118,10 @@ def test_get_bitinformation_set_zero_insignificant(implementation): bitinfo_szi_True = xb.get_bitinformation( ds, dim=dim, set_zero_insignificant=True, implementation=implementation ) - assert_different(bitinfo, bitinfo_szi_True) + assert_identical(bitinfo, bitinfo_szi_True) except NotImplementedError: assert implementation == "python" - assert_identical(bitinfo, bitinfo_szi_False) + assert_different(bitinfo, bitinfo_szi_False) @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) From 16bcbe17051739c424b694ffed79edc134a3ddba Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 18 Oct 2022 23:22:53 +0200 Subject: [PATCH 19/22] remove masked_value test for python implementation --- tests/test_get_bitinformation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index 1d8135dd..a992762b 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -90,7 +90,6 @@ def test_get_bitinformation_dim_string_equals_axis_int(implementation): assert_identical(bitinfox, bitinfo2) -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) def test_get_bitinformation_masked_value(implementation): """Test xb.get_bitinformation is sensitive to masked_value.""" ds = xr.tutorial.load_dataset("rasm") From 7bd215d0e1d88dfd7fc7b2a21c4ac442fb77adfd Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 19 Oct 2022 11:27:27 -0700 Subject: [PATCH 20/22] implementation sensitive set_zero_insiginificant test --- tests/test_get_bitinformation.py | 5 ++++- xbitinfo/xbitinfo.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index a992762b..2812c60b 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -120,7 +120,10 @@ def test_get_bitinformation_set_zero_insignificant(implementation): assert_identical(bitinfo, bitinfo_szi_True) except NotImplementedError: assert implementation == "python" - assert_different(bitinfo, bitinfo_szi_False) + if implementation == "python": + assert_identical(bitinfo, bitinfo_szi_False) + elif implementation == "BitInformation.jl": + assert_different(bitinfo, bitinfo_szi_False) @pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) diff --git a/xbitinfo/xbitinfo.py b/xbitinfo/xbitinfo.py index dbe045b7..60f33c9d 100644 --- a/xbitinfo/xbitinfo.py +++ b/xbitinfo/xbitinfo.py @@ -124,7 +124,7 @@ def get_bitinformation( - masked_value: defaults to ``NaN`` (different to ``bitinformation.jl`` defaulting to ``"nothing"``), set ``None`` disable masking - mask: use ``masked_value`` instead - - set_zero_insignificant (``bool``): defaults to ``True`` + - set_zero_insignificant (``bool``): defaults to ``True`` (BitInformation.jl implementation) or ``False`` (python implementation) - confidence (``float``): defaults to ``0.99`` From f683884b95bc386fa8ac7f0388161e39ca7eb7ec Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 19 Oct 2022 13:38:46 -0700 Subject: [PATCH 21/22] fix masked_value test --- tests/test_get_bitinformation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index 2812c60b..caedfe41 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -90,7 +90,7 @@ def test_get_bitinformation_dim_string_equals_axis_int(implementation): assert_identical(bitinfox, bitinfo2) -def test_get_bitinformation_masked_value(implementation): +def test_get_bitinformation_masked_value(implementation="BitInformation.jl"): """Test xb.get_bitinformation is sensitive to masked_value.""" ds = xr.tutorial.load_dataset("rasm") bitinfo = xb.get_bitinformation(ds, dim="x", implementation=implementation) From b70268e588e23b2cf3d37bd2080d860b64ab3072 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 20 Oct 2022 11:17:04 -0700 Subject: [PATCH 22/22] rename implementation BitInformation.jl -> julia --- tests/test_get_bitinformation.py | 26 +++++++++++++------------- xbitinfo/xbitinfo.py | 14 +++++++------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/test_get_bitinformation.py b/tests/test_get_bitinformation.py index caedfe41..939e5442 100644 --- a/tests/test_get_bitinformation.py +++ b/tests/test_get_bitinformation.py @@ -63,7 +63,7 @@ def bitinfo_assert_different(bitinfo1, bitinfo2): assert (bitinfo1 != bitinfo2).any() -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_returns_dataset(implementation): """Test xb.get_bitinformation returns xr.Dataset.""" ds = xr.tutorial.load_dataset("rasm") @@ -72,7 +72,7 @@ def test_get_bitinformation_returns_dataset(implementation): ) -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_dim(implementation): """Test xb.get_bitinformation is sensitive to dim.""" ds = xr.tutorial.load_dataset("rasm") @@ -81,7 +81,7 @@ def test_get_bitinformation_dim(implementation): assert_different(bitinfo0, bitinfo2) -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_dim_string_equals_axis_int(implementation): """Test xb.get_bitinformation undestands xarray dimension names the same way as axis as integers.""" ds = xr.tutorial.load_dataset("rasm") @@ -90,7 +90,7 @@ def test_get_bitinformation_dim_string_equals_axis_int(implementation): assert_identical(bitinfox, bitinfo2) -def test_get_bitinformation_masked_value(implementation="BitInformation.jl"): +def test_get_bitinformation_masked_value(implementation="julia"): """Test xb.get_bitinformation is sensitive to masked_value.""" ds = xr.tutorial.load_dataset("rasm") bitinfo = xb.get_bitinformation(ds, dim="x", implementation=implementation) @@ -104,7 +104,7 @@ def test_get_bitinformation_masked_value(implementation="BitInformation.jl"): assert_different(bitinfo, bitinfo_no_mask) -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_set_zero_insignificant(implementation): """Test xb.get_bitinformation is sensitive to set_zero_insignificant.""" ds = xr.tutorial.load_dataset("air_temperature") @@ -122,11 +122,11 @@ def test_get_bitinformation_set_zero_insignificant(implementation): assert implementation == "python" if implementation == "python": assert_identical(bitinfo, bitinfo_szi_False) - elif implementation == "BitInformation.jl": + elif implementation == "julia": assert_different(bitinfo, bitinfo_szi_False) -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_confidence(implementation): """Test xb.get_bitinformation is sensitive to confidence.""" ds = xr.tutorial.load_dataset("air_temperature") @@ -145,7 +145,7 @@ def test_get_bitinformation_confidence(implementation): assert implementation == "python" -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_label(rasm, implementation): """Test xb.get_bitinformation serializes when label given.""" ds = rasm @@ -160,7 +160,7 @@ def test_get_bitinformation_label(rasm, implementation): os.remove("./tmp_testdir/rasm.json") -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) @pytest.mark.parametrize("dtype", ["float64", "float32", "float16"]) def test_get_bitinformation_dtype(rasm, dtype, implementation): """Test xb.get_bitinformation returns correct number of bits depending on dtype.""" @@ -172,7 +172,7 @@ def test_get_bitinformation_dtype(rasm, dtype, implementation): ) -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_multidim(rasm, implementation): """Test xb.get_bitinformation runs on all dimensions by default""" ds = rasm @@ -187,7 +187,7 @@ def test_get_bitinformation_multidim(rasm, implementation): assert any(bi_y != bi_x) -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_different_variables_dims(rasm, implementation): """Test xb.get_bitinformation runs with variables of different dimensionality""" ds = rasm @@ -200,7 +200,7 @@ def test_get_bitinformation_different_variables_dims(rasm, implementation): assert_different(bi_Tair_mean_x, bi_Tair_x) -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_different_dtypes(rasm, implementation): ds = rasm ds["Tair32"] = ds.Tair.astype("float32") @@ -211,7 +211,7 @@ def test_get_bitinformation_different_dtypes(rasm, implementation): assert bitdim in bi.coords -@pytest.mark.parametrize("implementation", ["BitInformation.jl", "python"]) +@pytest.mark.parametrize("implementation", ["julia", "python"]) def test_get_bitinformation_dim_list(rasm, implementation): bi = xb.get_bitinformation(rasm, dim=["x", "y"], implementation=implementation) assert (bi.dim == ["x", "y"]).all() diff --git a/xbitinfo/xbitinfo.py b/xbitinfo/xbitinfo.py index 60f33c9d..08efb4a3 100644 --- a/xbitinfo/xbitinfo.py +++ b/xbitinfo/xbitinfo.py @@ -96,7 +96,7 @@ def get_bitinformation( axis=None, label=None, overwrite=False, - implementation="BitInformation.jl", + implementation="julia", **kwargs, ): """Wrap `BitInformation.jl.bitinformation() `__. @@ -117,14 +117,14 @@ def get_bitinformation( If ``False``, try using serialized bitinfo based on label; if true or label does not exist, run bitinformation implementation : str Bitinformation algorithm implementation. Valid options are - - BitInformation.jl, the original implementation in julia by Milan Kloewer - - python, a copy of the core functionality of BitInformation.jl in python + - julia, the original implementation of julia in julia by Milan Kloewer + - python, a copy of the core functionality of julia in python kwargs to be passed to bitinformation: - - masked_value: defaults to ``NaN`` (different to ``bitinformation.jl`` defaulting to ``"nothing"``), set ``None`` disable masking + - masked_value: defaults to ``NaN`` (different to ``julia`` defaulting to ``"nothing"``), set ``None`` disable masking - mask: use ``masked_value`` instead - - set_zero_insignificant (``bool``): defaults to ``True`` (BitInformation.jl implementation) or ``False`` (python implementation) + - set_zero_insignificant (``bool``): defaults to ``True`` (julia implementation) or ``False`` (python implementation) - confidence (``float``): defaults to ``0.99`` @@ -216,7 +216,7 @@ def get_bitinformation( pbar = tqdm(ds.data_vars) for var in pbar: pbar.set_description("Processing %s" % var) - if implementation == "BitInformation.jl": + if implementation == "julia": info_per_bit_var = _jl_get_bitinformation(ds, var, axis, dim, kwargs) if info_per_bit_var is None: continue @@ -298,7 +298,7 @@ def _get_bitinformation_along_dims( dim=None, label=None, overwrite=False, - implementation="BitInformation.jl", + implementation="julia", **kwargs, ): """Helper function for :py:func:`xbitinfo.xbitinfo.get_bitinformation` to handle multi-dimensional analysis for each dim specified.