Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow discrete sampling geometries with 1-d data to be written as ragged arrays, and improve the compression process #288

Merged
merged 5 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
Version 1.11.1.0
----------------

**2024-??-??**

* New keyword parameter to `cfdm.Field.insert_dimension`:
``constructs`` (https://github.com/NCAS-CMS/cfdm/issues/287)

----

Version 1.11.0.0
----------------

Expand Down
139 changes: 114 additions & 25 deletions cfdm/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Constructs,
Count,
Domain,
DomainAxis,
Index,
List,
core,
Expand Down Expand Up @@ -105,6 +106,7 @@ def __new__(cls, *args, **kwargs):
instance._AuxiliaryCoordinate = AuxiliaryCoordinate
instance._Constructs = Constructs
instance._Domain = Domain
instance._DomainAxis = DomainAxis
instance._RaggedContiguousArray = RaggedContiguousArray
instance._RaggedIndexedArray = RaggedIndexedArray
instance._RaggedIndexedContiguousArray = RaggedIndexedContiguousArray
Expand Down Expand Up @@ -1032,17 +1034,13 @@ def _RaggedContiguousArray(
return self._RaggedContiguousArray(
compressed_data,
shape=data.shape,
# size=data.size,
# ndim=data.ndim,
count_variable=count_variable,
)

def _RaggedIndexedArray(self, compressed_data, data, index_variable):
return self._RaggedIndexedArray(
compressed_data,
shape=data.shape,
# size=data.size,
# ndim=data.ndim,
index_variable=index_variable,
)

Expand All @@ -1052,8 +1050,6 @@ def _RaggedIndexedContiguousArray(
return self._RaggedIndexedContiguousArray(
compressed_data,
shape=data.shape,
# size=data.size,
# ndim=data.ndim,
count_variable=count_variable,
index_variable=index_variable,
)
Expand Down Expand Up @@ -1173,6 +1169,37 @@ def _compress_metadata(
y = Array_func(f, compressed_data, data=data, **kwargs)
data._set_CompressedArray(y, copy=False)

def _derive_count(flattened_data):
"""Derive the DSG count for each feature.

:Parameters:

flattened_data: array_like
The 2-d flattened array from which to derive the
counts. The leading dimension is the number of
features.

:Returns:

`list`
The count for each feature.

"""
count = []
masked = np.ma.masked
for d in flattened_data:
d = d.array
last = d.size
for i in d[::-1]:
if i is not masked:
break

last -= 1

count.append(last)

return count

f = _inplace_enabled_define_and_cleanup(self)

data = f.get_data(None)
Expand Down Expand Up @@ -1224,18 +1251,25 @@ def _compress_metadata(
# --------------------------------------------------------
flattened_data = data.flatten(range(data.ndim - 1))

count = []
masked = np.ma.masked
for d in flattened_data:
d = d.array
last = d.size
for i in d[::-1]:
if i is not masked:
break
# Try to get the counts from an auxiliary coordinate
# construct that spans the same axes as the field data
count = None
data_axes = f.get_data_axes()
construct_axes = f.constructs.data_axes()
for key, c in (
f.auxiliary_coordinates().filter_by_data(todict=True).items()
):
if construct_axes[key] != data_axes:
continue

last -= 1
count = _derive_count(c.data.flatten(range(c.ndim - 1)))
break

count.append(last)
if count is None:
# When no auxiliary coordinate constructs span the
# field data dimensions, get the counts from the field
# data.
count = _derive_count(flattened_data)

N = sum(count)
compressed_field_data = _empty_compressed_data(data, (N,))
Expand Down Expand Up @@ -2081,7 +2115,9 @@ def indices(self, **kwargs):
return tuple([indices[axis] for axis in self.get_data_axes()])

@_inplace_enabled(default=False)
def insert_dimension(self, axis, position=0, inplace=False):
def insert_dimension(
self, axis, position=0, constructs=False, inplace=False
):
"""Expand the shape of the data array.

Inserts a new size 1 axis, corresponding to an existing domain
Expand All @@ -2097,6 +2133,9 @@ def insert_dimension(self, axis, position=0, inplace=False):
The identifier of the domain axis construct
corresponding to the inserted axis.

If *axis* is `None` then a new domain axis construct
will be created for the inserted dimension.

*Parameter example:*
``axis='domainaxis2'``

Expand All @@ -2112,6 +2151,13 @@ def insert_dimension(self, axis, position=0, inplace=False):
*Parameter example:*
``position=-1``

constructs: `bool`
If True then also insert the new axis into all
metadata constructs that don't already include it. By
default, metadata constructs are not changed.

.. versionadded:: (cfdm) 1.11.1.0

{{inplace: `bool`, optional}}

:Returns:
Expand All @@ -2132,22 +2178,30 @@ def insert_dimension(self, axis, position=0, inplace=False):
(19, 73, 1, 96)
>>> f.data.shape
(19, 73, 1, 96)
>>> f.insert_dimension(None, 1).data.shape
(19, 1, 73, 1, 96)

"""
f = _inplace_enabled_define_and_cleanup(self)

domain_axis = f.domain_axes(todict=True).get(axis)
if domain_axis is None:
raise ValueError(f"Can't insert non-existent domain axis: {axis}")

if domain_axis.get_size() != 1:
raise ValueError(
f"Can only insert axis of size 1. Axis {axis!r} has size "
f"{domain_axis.get_size()}"
if axis is None:
axis = f.set_construct(self._DomainAxis(1))
else:
axis, domain_axis = f.domain_axis(
axis,
item=True,
default=ValueError("Can't identify a unique axis to insert"),
)

if domain_axis.get_size() != 1:
raise ValueError(
f"Can only insert axis of size 1. Axis {axis!r} has size "
f"{domain_axis.get_size()}"
)

data_axes = f.get_data_axes(default=None)
if data_axes is not None:
data_axes0 = data_axes[:]
if axis in data_axes:
raise ValueError(
f"Can't insert a duplicate data array axis: {axis!r}"
Expand All @@ -2159,9 +2213,44 @@ def insert_dimension(self, axis, position=0, inplace=False):
# Expand the dims in the field's data array
super(Field, f).insert_dimension(position, inplace=True)

# Update the axes
if data_axes is not None:
f.set_data_axes(data_axes)

if constructs:
if data_axes is None:
data_axes0 = []
position = 0

for key, construct in f.constructs.filter_by_data(
todict=True
).items():
data = construct.get_data(
None, _units=False, _fill_value=False
)
if data is None:
continue

construct_axes = list(f.get_data_axes(key))
if axis in construct_axes:
continue

# Find the position of the new axis
c_position = position
for a in data_axes0:
if a not in construct_axes:
c_position -= 1

if c_position < 0:
c_position = 0

# Expand the dims in the construct's data array
construct.insert_dimension(c_position, inplace=True)

# Update the construct axes
construct_axes.insert(c_position, axis)
f.set_data_axes(axes=construct_axes, key=key)

return f

def convert(self, *identity, full_domain=True, **filter_kwargs):
Expand Down
11 changes: 9 additions & 2 deletions cfdm/test/test_Field.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,8 @@ def test_Field_has_construct(self):
f.set_construct(cfdm.DomainAxis(0), key="")
self.assertTrue(f.has_construct(""))

def test_Field_squeeze_transpose_insert_dimension(self):
"""Test squeeze, transpose and `insert_dimension` methods."""
def test_Field_squeeze_transpose(self):
"""Test squeeze and transpose methods."""
f = self.f1

g = f.transpose()
Expand All @@ -555,6 +555,9 @@ def test_Field_squeeze_transpose_insert_dimension(self):
(g.get_data_axes(), f.get_data_axes()),
)

def test_Field_insert_dimension(self):
"""Test cfdm.Field.insert_dimension method."""
f = self.f1
g = f.copy()

key = g.set_construct(cfdm.DomainAxis(1))
Expand All @@ -567,6 +570,10 @@ def test_Field_squeeze_transpose_insert_dimension(self):
self.assertEqual(h.data.ndim, f.data.ndim + 1)
self.assertEqual(h.get_data_axes()[:-1], f.get_data_axes())

self.assertEqual(g.cell_measure().ndim, 2)
h = g.insert_dimension(None, constructs=True)
self.assertEqual(h.cell_measure().ndim, 3)

def test_Field_compress_uncompress(self):
"""Test the compress and uncompress Field methods."""
contiguous = os.path.join(
Expand Down