Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/issue 271 - Remove randomized value check #272

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Catch "unsupported datatype" exception from netCDF library ([#268](https://github.com/nasa/ncompare/pull/268)) ([**@danielfromearth**](https://github.com/danielfromearth))

### Removed

- **Breaking:** remove randomized value check logic and from API ([#271](https://github.com/nasa/ncompare/pull/271)) ([**@danielfromearth**](https://github.com/danielfromearth))

## [1.11.0] - 2024-11-14

### Added
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,6 @@ ncompare S001G01.nc S001G01_SUBSET.nc --file-text subset_comparison.txt
- `--no-color` : Turn off all colorized output.
- `--show-attributes` : Include variable attributes in the table that compares variables.
- `--show-chunks` : Include chunk sizes in the table that compares variables.
- `-v` (`--comparison_var_name`) [VAR_NAME]: Compare specific values for this variable.
- `-g` (`--comparison_var_group`) [VAR_GROUP]: Group that contains the `comparison_var_name`.
- `--column-widths` [WIDTH, WIDTH, WIDTH]: Width, in number of characters, of the three columns in the comparison report
- `--version` : Show the current version and then exit.

Expand Down
2 changes: 0 additions & 2 deletions ncompare/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ def _cli(args: Optional[Sequence[str]]) -> argparse.Namespace:
)
parser.add_argument("nc_a", help="First NetCDF file")
parser.add_argument("nc_b", help="Second NetCDF file")
parser.add_argument("-v", "--comparison_var_name", help="Comparison variable name")
parser.add_argument("-g", "--comparison_var_group", help="Comparison variable group")
parser.add_argument(
"--only-diffs",
action="store_true",
Expand Down
150 changes: 1 addition & 149 deletions ncompare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,14 @@

"""Compare the structure of two NetCDF files."""

import random
import traceback
from collections import namedtuple
from collections.abc import Iterable, Iterator
from pathlib import Path
from typing import Optional, Union

import netCDF4
import numpy as np
import xarray as xr
from colorama import Fore, Style
from colorama import Fore

from ncompare.printing import Outputter
from ncompare.sequence_operations import common_elements, count_diffs
Expand All @@ -57,8 +54,6 @@
def compare(
nc_a: Union[str, Path],
nc_b: Union[str, Path],
comparison_var_group: Optional[str] = None,
comparison_var_name: Optional[str] = None,
only_diffs: bool = False,
no_color: bool = False,
show_chunks: bool = False,
Expand All @@ -76,10 +71,6 @@ def compare(
filepath to NetCDF4
nc_b : str
filepath to NetCDF4
comparison_var_group : str, optional
The name of a group which contains a desired comparison variable
comparison_var_name : str, optional
The name of a variable for which we want to compare values
only_diffs : bool, optional
Whether to show only the variables/attributes that are different between the two files
no_color : bool, default False
Expand Down Expand Up @@ -127,8 +118,6 @@ def compare(
out,
nc_a,
nc_b,
comparison_var_group=comparison_var_group,
comparison_var_name=comparison_var_name,
show_chunks=show_chunks,
show_attributes=show_attributes,
)
Expand All @@ -146,8 +135,6 @@ def run_through_comparisons(
out: Outputter,
nc_a: Union[str, Path],
nc_b: Union[str, Path],
comparison_var_group: Optional[str],
comparison_var_name: Optional[str],
show_chunks: bool,
show_attributes: bool,
) -> None:
Expand All @@ -158,8 +145,6 @@ def run_through_comparisons(
out
nc_a
nc_b
comparison_var_group
comparison_var_name
show_chunks
show_attributes
"""
Expand All @@ -175,92 +160,12 @@ def run_through_comparisons(
list_b = _get_groups(nc_b)
_, _, _ = out.lists_diff(list_a, list_b)

if comparison_var_group:
# Show the variables within the selected group.
out.print(
Fore.LIGHTBLUE_EX + f"\nVariables within specified group <{comparison_var_group}>:",
add_to_history=True,
)
vlist_a = _get_vars(nc_a, comparison_var_group)
vlist_b = _get_vars(nc_b, comparison_var_group)
_, _, _ = out.lists_diff(vlist_a, vlist_b)

# TODO: Remove comparison variable/val?
if comparison_var_name:
try:
# Print the first part of the values array for the selected variable.
out.print(
Fore.LIGHTBLUE_EX
+ f"\nSample values within specified variable <{comparison_var_name}>:"
)
_print_sample_values(out, nc_a, comparison_var_group, comparison_var_name)
_print_sample_values(out, nc_b, comparison_var_group, comparison_var_name)
# compare_sample_values(nc_a, nc_b, groupname=comparison_var_group, varname=comparison_var_name)

out.print(
Fore.LIGHTBLUE_EX
+ f"\nChecking multiple random values within specified variable <{comparison_var_name}>:"
)
compare_multiple_random_values(
out,
nc_a,
nc_b,
groupname=comparison_var_group,
varname=comparison_var_name,
)

except KeyError:
out.print(
Style.BRIGHT
+ Fore.RED
+ f"\nError when comparing values for variable <{comparison_var_name}> "
f"in group <{comparison_var_group}>."
)
out.print(traceback.format_exc())
out.print("\n")
else:
out.print(Fore.LIGHTBLACK_EX + "\nNo variable selected for comparison. Skipping..")
else:
out.print(Fore.LIGHTBLACK_EX + "\nNo variable group selected for comparison. Skipping..")

out.print(Fore.LIGHTBLUE_EX + "\nAll variables:", add_to_history=True)
_, _, _ = compare_two_nc_files(
out, nc_a, nc_b, show_chunks=show_chunks, show_attributes=show_attributes
)


def compare_multiple_random_values(
out: Outputter,
nc_a: Union[str, Path],
nc_b: Union[str, Path],
groupname: str,
varname: str,
num_comparisons: int = 100,
):
"""Iterate through N random samples, and evaluate whether the differences exceed a threshold."""
# Open a variable from each NetCDF
nc_var_a = xr.open_dataset(nc_a, backend_kwargs={"group": groupname}).variables[varname]
nc_var_b = xr.open_dataset(nc_b, backend_kwargs={"group": groupname}).variables[varname]

num_mismatches = 0
for _ in range(num_comparisons):
match_result = _match_random_value(out, nc_var_a, nc_var_b)
if match_result is True:
out.print(".", colors=False, end="")
elif match_result is None:
out.print("n", colors=False, end="")
num_mismatches += 1
else:
out.print("x", colors=False, end="")
num_mismatches += 1

if num_mismatches > 0:
out.print(Fore.RED + f" {num_mismatches} mismatches, out of {num_comparisons} samples.")
else:
out.print(Fore.CYAN + " No mismatches.")
out.print("Done.", colors=False)


def walk_common_groups_tree(
top_a_name: str,
top_a: Union[netCDF4.Dataset, netCDF4.Group],
Expand Down Expand Up @@ -587,59 +492,6 @@ def _var_properties(group: Union[netCDF4.Dataset, netCDF4.Group], varname: str)
return VarProperties(varname, the_variable, v_dtype, v_shape, v_chunking, v_attributes)


def _match_random_value(
out: Outputter, nc_var_a: xr.Variable, nc_var_b: xr.Variable, thresh: float = 1e-6
) -> Union[bool, None]:
"""Check whether a randomly selected data point matches between two variables.

Returns
-------
None or bool
None if data point is null for one and only one of the variables
True if values match
False if the difference exceeds the given threshold
"""
# Get a random indexer
rand_index = []
for dim_length in nc_var_a.shape:
rand_index.append(random.randint(0, dim_length - 1))
rand_index_tuple = tuple(rand_index)

# Get the values from each variable
value_a = nc_var_a.values[rand_index_tuple]
value_b = nc_var_b.values[rand_index_tuple]

# Check whether null
if np.isnan(value_a) and np.isnan(value_b):
return True
elif np.isnan(value_a) or np.isnan(value_b):
return None

# Evaluate difference between values
diff = value_b - value_a
if abs(diff) > thresh:
out.print()
out.print(Fore.RED + f"Difference exceeded threshold (diff == {diff}")
out.print(f"var shape: {nc_var_a.shape}", colors=False)
out.print(f"indices: {rand_index_tuple}", colors=False)
out.print(f"value a: {value_a}", colors=False)
out.print(f"value b: {value_b}", colors=False, end="\n\n")
return False

return True


def _print_sample_values(out: Outputter, nc_filepath, groupname: str, varname: str) -> None:
comparison_variable = xr.open_dataset(nc_filepath, backend_kwargs={"group": groupname})[varname]
vector_of_values = comparison_variable.values.flatten()
n_values = len(vector_of_values)
if n_values > 100:
sample_length = 100
else:
sample_length = n_values
out.print(str(vector_of_values[:sample_length]), colors=False)


def _get_attribute_value_as_str(varprops: VarProperties, attribute_key: str) -> str:
if attribute_key and (attribute_key in varprops.attributes):
attr = varprops.attributes[attribute_key]
Expand Down
2 changes: 0 additions & 2 deletions tests/data/a-b_test_golden_file.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ Root-level Groups:
Are all items the same? ---> True.
['Data', 'Position', 'Statistics']

No variable group selected for comparison. Skipping..

All variables:
File A File B
All Variables
Expand Down
Loading
Loading