@@ -78,6 +78,71 @@ def __repr__(self) -> str:
78
78
)
79
79
80
80
81
+ class NativeEndiannessArray (indexing .ExplicitlyIndexedNDArrayMixin ):
82
+ """Decode arrays on the fly from non-native to native endianness
83
+
84
+ This is useful for decoding arrays from netCDF3 files (which are all
85
+ big endian) into native endianness, so they can be used with Cython
86
+ functions, such as those found in bottleneck and pandas.
87
+
88
+ >>> x = np.arange(5, dtype=">i2")
89
+
90
+ >>> x.dtype
91
+ dtype('>i2')
92
+
93
+ >>> NativeEndiannessArray(x).dtype
94
+ dtype('int16')
95
+
96
+ >>> indexer = indexing.BasicIndexer((slice(None),))
97
+ >>> NativeEndiannessArray(x)[indexer].dtype
98
+ dtype('int16')
99
+ """
100
+
101
+ __slots__ = ("array" ,)
102
+
103
+ def __init__ (self , array ):
104
+ self .array = indexing .as_indexable (array )
105
+
106
+ @property
107
+ def dtype (self ):
108
+ return np .dtype (self .array .dtype .kind + str (self .array .dtype .itemsize ))
109
+
110
+ def __getitem__ (self , key ):
111
+ return np .asarray (self .array [key ], dtype = self .dtype )
112
+
113
+
114
+ class BoolTypeArray (indexing .ExplicitlyIndexedNDArrayMixin ):
115
+ """Decode arrays on the fly from integer to boolean datatype
116
+
117
+ This is useful for decoding boolean arrays from integer typed netCDF
118
+ variables.
119
+
120
+ >>> x = np.array([1, 0, 1, 1, 0], dtype="i1")
121
+
122
+ >>> x.dtype
123
+ dtype('int8')
124
+
125
+ >>> BoolTypeArray(x).dtype
126
+ dtype('bool')
127
+
128
+ >>> indexer = indexing.BasicIndexer((slice(None),))
129
+ >>> BoolTypeArray(x)[indexer].dtype
130
+ dtype('bool')
131
+ """
132
+
133
+ __slots__ = ("array" ,)
134
+
135
+ def __init__ (self , array ):
136
+ self .array = indexing .as_indexable (array )
137
+
138
+ @property
139
+ def dtype (self ):
140
+ return np .dtype ("bool" )
141
+
142
+ def __getitem__ (self , key ):
143
+ return np .asarray (self .array [key ], dtype = self .dtype )
144
+
145
+
81
146
def lazy_elemwise_func (array , func : Callable , dtype : np .typing .DTypeLike ):
82
147
"""Lazily apply an element-wise function to an array.
83
148
Parameters
@@ -159,30 +224,34 @@ def encode(self, variable: Variable, name: T_Name = None):
159
224
fv = encoding .get ("_FillValue" )
160
225
mv = encoding .get ("missing_value" )
161
226
162
- if (
163
- fv is not None
164
- and mv is not None
165
- and not duck_array_ops .allclose_or_equiv (fv , mv )
166
- ):
167
- raise ValueError (
168
- f"Variable { name !r} has conflicting _FillValue ({ fv } ) and missing_value ({ mv } ). Cannot encode data."
169
- )
227
+ if fv is not None or mv is not None :
228
+ if (
229
+ fv is not None
230
+ and mv is not None
231
+ and not duck_array_ops .allclose_or_equiv (fv , mv )
232
+ ):
233
+ raise ValueError (
234
+ f"Variable { name !r} has conflicting _FillValue ({ fv } ) and missing_value ({ mv } ). Cannot encode data."
235
+ )
170
236
171
- if fv is not None :
172
- # Ensure _FillValue is cast to same dtype as data's
173
- encoding ["_FillValue" ] = dtype .type (fv )
174
- fill_value = pop_to (encoding , attrs , "_FillValue" , name = name )
175
- if not pd .isnull (fill_value ):
176
- data = duck_array_ops .fillna (data , fill_value )
237
+ if fv is not None :
238
+ # Ensure _FillValue is cast to same dtype as data's
239
+ encoding ["_FillValue" ] = dtype .type (fv )
240
+ fill_value = pop_to (encoding , attrs , "_FillValue" , name = name )
241
+ if not pd .isnull (fill_value ):
242
+ data = duck_array_ops .fillna (data , fill_value )
177
243
178
- if mv is not None :
179
- # Ensure missing_value is cast to same dtype as data's
180
- encoding ["missing_value" ] = dtype .type (mv )
181
- fill_value = pop_to (encoding , attrs , "missing_value" , name = name )
182
- if not pd .isnull (fill_value ) and fv is None :
183
- data = duck_array_ops .fillna (data , fill_value )
244
+ if mv is not None :
245
+ # Ensure missing_value is cast to same dtype as data's
246
+ encoding ["missing_value" ] = dtype .type (mv )
247
+ fill_value = pop_to (encoding , attrs , "missing_value" , name = name )
248
+ if not pd .isnull (fill_value ) and fv is None :
249
+ data = duck_array_ops .fillna (data , fill_value )
184
250
185
- return Variable (dims , data , attrs , encoding , fastpath = True )
251
+ return Variable (dims , data , attrs , encoding , fastpath = True )
252
+
253
+ else :
254
+ return variable
186
255
187
256
def decode (self , variable : Variable , name : T_Name = None ):
188
257
dims , data , attrs , encoding = unpack_for_decoding (variable )
@@ -349,3 +418,99 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
349
418
return Variable (dims , data , attrs , encoding , fastpath = True )
350
419
else :
351
420
return variable
421
+
422
+
423
+ class DefaultFillvalueCoder (VariableCoder ):
424
+ """Encode default _FillValue if needed."""
425
+
426
+ def encode (self , variable : Variable , name : T_Name = None ) -> Variable :
427
+ dims , data , attrs , encoding = unpack_for_encoding (variable )
428
+ # make NaN the fill value for float types
429
+ if (
430
+ "_FillValue" not in attrs
431
+ and "_FillValue" not in encoding
432
+ and np .issubdtype (variable .dtype , np .floating )
433
+ ):
434
+ attrs ["_FillValue" ] = variable .dtype .type (np .nan )
435
+ return Variable (dims , data , attrs , encoding , fastpath = True )
436
+ else :
437
+ return variable
438
+
439
+ def decode (self , variable : Variable , name : T_Name = None ) -> Variable :
440
+ raise NotImplementedError ()
441
+
442
+
443
+ class BooleanCoder (VariableCoder ):
444
+ """Code boolean values."""
445
+
446
+ def encode (self , variable : Variable , name : T_Name = None ) -> Variable :
447
+ if (
448
+ (variable .dtype == bool )
449
+ and ("dtype" not in variable .encoding )
450
+ and ("dtype" not in variable .attrs )
451
+ ):
452
+ dims , data , attrs , encoding = unpack_for_encoding (variable )
453
+ attrs ["dtype" ] = "bool"
454
+ data = duck_array_ops .astype (data , dtype = "i1" , copy = True )
455
+
456
+ return Variable (dims , data , attrs , encoding , fastpath = True )
457
+ else :
458
+ return variable
459
+
460
+ def decode (self , variable : Variable , name : T_Name = None ) -> Variable :
461
+ if variable .attrs .get ("dtype" , False ) == "bool" :
462
+ dims , data , attrs , encoding = unpack_for_decoding (variable )
463
+ del attrs ["dtype" ]
464
+ data = BoolTypeArray (data )
465
+ return Variable (dims , data , attrs , encoding , fastpath = True )
466
+ else :
467
+ return variable
468
+
469
+
470
+ class EndianCoder (VariableCoder ):
471
+ """Decode Endianness to native."""
472
+
473
+ def encode (self ):
474
+ raise NotImplementedError ()
475
+
476
+ def decode (self , variable : Variable , name : T_Name = None ) -> Variable :
477
+ dims , data , attrs , encoding = unpack_for_decoding (variable )
478
+ if not data .dtype .isnative :
479
+ data = NativeEndiannessArray (data )
480
+ return Variable (dims , data , attrs , encoding , fastpath = True )
481
+ else :
482
+ return variable
483
+
484
+
485
+ class NonStringCoder (VariableCoder ):
486
+ """Encode NonString variables if dtypes differ."""
487
+
488
+ def encode (self , variable : Variable , name : T_Name = None ) -> Variable :
489
+ if "dtype" in variable .encoding and variable .encoding ["dtype" ] not in (
490
+ "S1" ,
491
+ str ,
492
+ ):
493
+ dims , data , attrs , encoding = unpack_for_encoding (variable )
494
+ dtype = np .dtype (encoding .pop ("dtype" ))
495
+ if dtype != variable .dtype :
496
+ if np .issubdtype (dtype , np .integer ):
497
+ if (
498
+ np .issubdtype (variable .dtype , np .floating )
499
+ and "_FillValue" not in variable .attrs
500
+ and "missing_value" not in variable .attrs
501
+ ):
502
+ warnings .warn (
503
+ f"saving variable { name } with floating "
504
+ "point data as an integer dtype without "
505
+ "any _FillValue to use for NaNs" ,
506
+ SerializationWarning ,
507
+ stacklevel = 10 ,
508
+ )
509
+ data = np .around (data )
510
+ data = data .astype (dtype = dtype )
511
+ return Variable (dims , data , attrs , encoding , fastpath = True )
512
+ else :
513
+ return variable
514
+
515
+ def decode (self ):
516
+ raise NotImplementedError ()
0 commit comments