Skip to content

Commit

Permalink
Merge pull request #277 from nasa/feature/issue-275-add-dimension-nam…
Browse files Browse the repository at this point in the history
…es-to-report-summary-in-addition-to-variable-shape

Feature/issue 275 add dimension names to report summary in addition to variable shape
  • Loading branch information
danielfromearth authored Dec 12, 2024
2 parents 575d128 + 8244a49 commit 8981a56
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 50 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Categorize counts of differences (including attributes) in a summary ([#276](https://github.com/nasa/ncompare/pull/276)) ([**@danielfromearth**](https://github.com/danielfromearth))
- Include dimensions in variable attribute comparisons. ([#277](https://github.com/nasa/ncompare/pull/277)) ([**@danielfromearth**](https://github.com/danielfromearth))

### Changed

Expand Down
48 changes: 41 additions & 7 deletions docs/example/ncompare-example-usage.ipynb

Large diffs are not rendered by default.

65 changes: 28 additions & 37 deletions ncompare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@
from ncompare.sequence_operations import common_elements, count_diffs
from ncompare.utils import ensure_valid_path_exists, ensure_valid_path_with_suffix

VarProperties = namedtuple("VarProperties", "varname, variable, dtype, shape, chunking, attributes")
VarProperties = namedtuple(
"VarProperties", "varname, variable, dtype, dimensions, shape, chunking, attributes"
)

GroupPair = namedtuple(
"GroupPair",
Expand Down Expand Up @@ -443,7 +445,7 @@ def _print_group_details_side_by_side(


def _print_var_properties_side_by_side(
out,
out: Outputter,
v_a: VarProperties,
v_b: VarProperties,
num_attribute_diffs: SummaryDifferencesDict,
Expand All @@ -455,6 +457,7 @@ def _print_var_properties_side_by_side(
# so we can decide whether to highlight the variable header.
pairs_to_check_and_show = [
(v_a.dtype, v_b.dtype),
(v_a.dimensions, v_b.dimensions),
(v_a.shape, v_b.shape),
]
if show_chunks:
Expand Down Expand Up @@ -487,48 +490,32 @@ def _print_var_properties_side_by_side(
force_display_even_if_same=True,
)

# Data type
diff_condition: SummaryDifferenceKeys = out.side_by_side(
"dtype:", v_a.dtype, v_b.dtype, highlight_diff=True
)
num_attribute_diffs[diff_condition] += 1
if diff_condition in ("left", "right", "both"):
num_attribute_diffs["difference_types"].add("dtype")
# Shape
diff_condition = out.side_by_side("shape:", v_a.shape, v_b.shape, highlight_diff=True)
num_attribute_diffs[diff_condition] += 1
if diff_condition in ("left", "right", "both"):
num_attribute_diffs["difference_types"].add("shape")
# Chunking
if show_chunks:
diff_condition = out.side_by_side(
"chunksize:", v_a.chunking, v_b.chunking, highlight_diff=True
# Go through each attribute, show differences, and add differences to running tally.
def _var_attribute_side_by_side(attribute_name, attribute_a, attribute_b):
diff_condition: SummaryDifferenceKeys = out.side_by_side(
f"{attribute_name}:", attribute_a, attribute_b, highlight_diff=True
)
num_attribute_diffs[diff_condition] += 1
if diff_condition in ("left", "right", "both"):
num_attribute_diffs["difference_types"].add("chunksize")
# Attributes
num_attribute_diffs["difference_types"].add(attribute_name)

_var_attribute_side_by_side("dtype", v_a.dtype, v_b.dtype)
_var_attribute_side_by_side("dimensions", v_a.dimensions, v_b.dimensions)
_var_attribute_side_by_side("shape", v_a.shape, v_b.shape)
# Chunking
if show_chunks:
_var_attribute_side_by_side("chunksize", v_a.chunking, v_b.chunking)
# Scale Factor
scale_factor_pair = _get_and_check_variable_scale_factor(v_a, v_b)
if scale_factor_pair:
_var_attribute_side_by_side("scale_factor", scale_factor_pair[0], scale_factor_pair[1])
# Other attributes
if show_attributes:
for attr_a_key, attr_a, attr_b_key, attr_b in _get_and_check_variable_attributes(v_a, v_b):
# Check whether attr_a_key is empty,
# because it might be if the variable doesn't exist in File A.
attribute_key = attr_a_key if attr_a_key else attr_b_key
diff_condition = out.side_by_side(
f"{attribute_key}:", attr_a, attr_b, highlight_diff=True
)
num_attribute_diffs[diff_condition] += 1
if diff_condition in ("left", "right", "both"):
num_attribute_diffs["difference_types"].add(attribute_key)

# Scale Factor
scale_factor_pair = _get_and_check_variable_scale_factor(v_a, v_b)
if scale_factor_pair:
diff_condition = out.side_by_side(
"sf:", scale_factor_pair[0], scale_factor_pair[1], highlight_diff=True
)
num_attribute_diffs[diff_condition] += 1
if diff_condition in ("left", "right", "both"):
num_attribute_diffs["difference_types"].add("scale_factor")
_var_attribute_side_by_side(attribute_key, attr_a, attr_b)


def _get_and_check_variable_scale_factor(
Expand Down Expand Up @@ -584,6 +571,7 @@ def _var_properties(group: Union[netCDF4.Dataset, netCDF4.Group], varname: str)
if varname:
the_variable = group.variables[varname]
v_dtype = str(the_variable.dtype)
v_dimensions = str(the_variable.dimensions)
v_shape = str(the_variable.shape).strip()
v_chunking = str(the_variable.chunking()).strip()

Expand All @@ -598,11 +586,14 @@ def _var_properties(group: Union[netCDF4.Dataset, netCDF4.Group], varname: str)
else:
the_variable = None
v_dtype = ""
v_dimensions = ""
v_shape = ""
v_chunking = ""
v_attributes = None

return VarProperties(varname, the_variable, v_dtype, v_shape, v_chunking, v_attributes)
return VarProperties(
varname, the_variable, v_dtype, v_dimensions, v_shape, v_chunking, v_attributes
)


def _get_attribute_value_as_str(varprops: VarProperties, attribute_key: str) -> str:
Expand Down
17 changes: 14 additions & 3 deletions tests/data/a-b_test_golden_file.csv
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ num variables in group:,2,2,
-,-,-,
-----VARIABLE-----:,conditions,conditions,
dtype:,int32,int32,
dimensions:,"('conditions',)","('conditions',)",
shape:,"(2,)","(2,)",
chunksize:,contiguous,contiguous,
-----VARIABLE-----:,time,time,
dtype:,float64,float64,
dimensions:,"('time',)","('time',)",
shape:,"(5,)","(5,)",
chunksize:,[512],[512],
calendar:,gregorian,gregorian,
Expand All @@ -29,6 +31,7 @@ num variables in group:,1,1,
-,-,-,
-----VARIABLE-----:,level,level,
dtype:,int32,int32,
dimensions:,"('level',)","('level',)",
shape:,"(2,)","(2,)",
chunksize:,[1024],[1024],
units:,hPa,hPa,
Expand All @@ -38,11 +41,13 @@ num variables in group:,2,2,
-,-,-,
-----VARIABLE-----:,lat,lat,
dtype:,float32,float32,
dimensions:,"('lat',)","('lat',)",
shape:,"(3,)","(2,)",***
chunksize:,contiguous,contiguous,
units:,degrees north,degrees north,
-----VARIABLE-----:,lon,lon,
dtype:,float32,float32,
dimensions:,"('lon',)","('lon',)",
shape:,"(4,)","(2,)",***
chunksize:,contiguous,contiguous,
units:,degrees east,degrees east,
Expand All @@ -52,12 +57,14 @@ num variables in group:,1,1,
-,-,-,
-----VARIABLE-----:,mean_value,,
dtype:,float32,,***
dimensions:,"('time',)",,***
shape:,"(5,)",,***
chunksize:,[1024],,***
coordinates:,time,,***
long_name:,average value for each time,,***
-----VARIABLE-----:,,std_value,
dtype:,,float32,***
dimensions:,,"('time',)",***
shape:,,"(5,)",***
chunksize:,,[1024],***
coordinates:,,time,***
Expand All @@ -68,6 +75,7 @@ num variables in group:,1,1,
-,-,-,
-----VARIABLE-----:,temp,temp,
dtype:,float32,float32,
dimensions:,"('time', 'level', 'lat', 'lon')","('time', 'level', 'lat', 'lon')",
shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",***
chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",***
long_name:,temperature,temperature,
Expand All @@ -78,6 +86,7 @@ num variables in group:,1,1,
-,-,-,
-----VARIABLE-----:,quality_flag,quality_flag,
dtype:,int32,int32,
dimensions:,"('time', 'level', 'lat', 'lon')","('time', 'level', 'lat', 'lon')",
shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",***
chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",***
units:,unitless,unitless,
Expand All @@ -87,6 +96,7 @@ num variables in group:,0,1,***
-,-,-,
-----VARIABLE-----:,,supplemental_flag,
dtype:,,int32,***
dimensions:,,"('time', 'conditions')",***
shape:,,"(5, 2)",***
chunksize:,,"[1, 2]",***
units:,,unitless,***
Expand All @@ -96,6 +106,7 @@ num variables in group:,0,1,***
-,-,-,
-----VARIABLE-----:,,condition_details,
dtype:,,float64,***
dimensions:,,"('conditions',)",***
shape:,,"(2,)",***
chunksize:,,contiguous,***
-,-,-,
Expand All @@ -104,7 +115,7 @@ Total # of shared variables:,7,7,
Total # of non-shared variables:,1,3,
Total # of shared groups:,5,5,
Total # of non-shared groups:,0,2,
Total # of shared attributes:,24,24,
Total # of non-shared attributes:,12,19,
Total # of shared attributes:,31,31,
Total # of non-shared attributes:,13,22,
Differences were found in these attributes:
"['chunksize', 'coordinates', 'dtype', 'long_name', 'shape', 'units']"
"['chunksize', 'coordinates', 'dimensions', 'dtype', 'long_name', 'shape', 'units']"
17 changes: 14 additions & 3 deletions tests/data/a-b_test_golden_file.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: conditions conditions
dtype: int32 int32
dimensions: ('conditions',) ('conditions',)
shape: (2,) (2,)
chunksize: contiguous contiguous
-----VARIABLE-----: time time
dtype: float64 float64
dimensions: ('time',) ('time',)
shape: (5,) (5,)
chunksize: [512] [512]
calendar: gregorian gregorian
Expand All @@ -35,6 +37,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: level level
dtype: int32 int32
dimensions: ('level',) ('level',)
shape: (2,) (2,)
chunksize: [1024] [1024]
units: hPa hPa
Expand All @@ -44,11 +47,13 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: lat lat
dtype: float32 float32
dimensions: ('lat',) ('lat',)
shape: (3,) (2,)
chunksize: contiguous contiguous
units: degrees north degrees north
-----VARIABLE-----: lon lon
dtype: float32 float32
dimensions: ('lon',) ('lon',)
shape: (4,) (2,)
chunksize: contiguous contiguous
units: degrees east degrees east
Expand All @@ -58,12 +63,14 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: mean_value
dtype: float32
dimensions: ('time',)
shape: (5,)
chunksize: [1024]
coordinates: time
long_name: average value for each time
-----VARIABLE-----: std_value
dtype: float32
dimensions: ('time',)
shape: (5,)
chunksize: [1024]
coordinates: time
Expand All @@ -74,6 +81,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: temp temp
dtype: float32 float32
dimensions: ('time', 'level', 'lat', 'lon') ('time', 'level', 'lat', 'lon')
shape: (5, 2, 3, 4) (5, 2, 2, 2)
chunksize: [1, 1, 3, 4] [1, 1, 2, 2]
long_name: temperature temperature
Expand All @@ -84,6 +92,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: quality_flag quality_flag
dtype: int32 int32
dimensions: ('time', 'level', 'lat', 'lon') ('time', 'level', 'lat', 'lon')
shape: (5, 2, 3, 4) (5, 2, 2, 2)
chunksize: [1, 1, 3, 4] [1, 1, 2, 2]
units: unitless unitless
Expand All @@ -93,6 +102,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: supplemental_flag
dtype: int32
dimensions: ('time', 'conditions')
shape: (5, 2)
chunksize: [1, 2]
units: unitless
Expand All @@ -102,6 +112,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: condition_details
dtype: float64
dimensions: ('conditions',)
shape: (2,)
chunksize: contiguous
- ------------------------------------------------ ------------------------------------------------
Expand All @@ -110,11 +121,11 @@ All variables:
Total # of non-shared variables: 1 3
Total # of shared groups: 5 5
Total # of non-shared groups: 0 2
Total # of shared attributes: 24 24
Total # of non-shared attributes: 12 19
Total # of shared attributes: 31 31
Total # of non-shared attributes: 13 22

Differences were found in these attributes:

['chunksize', 'coordinates', 'dtype', 'long_name', 'shape', 'units']
['chunksize', 'coordinates', 'dimensions', 'dtype', 'long_name', 'shape', 'units']

Done.
Binary file modified tests/data/a-b_test_golden_file.xlsx
Binary file not shown.

0 comments on commit 8981a56

Please sign in to comment.