17
17
from contextlib import ExitStack
18
18
from io import BytesIO
19
19
from pathlib import Path
20
- from typing import TYPE_CHECKING , Any , Final , cast
20
+ from typing import TYPE_CHECKING , Any , Callable , Final , cast
21
21
22
22
import numpy as np
23
23
import pandas as pd
@@ -138,96 +138,110 @@ def open_example_mfdataset(names, *args, **kwargs) -> Dataset:
138
138
)
139
139
140
140
141
- def create_masked_and_scaled_data () -> Dataset :
142
- x = np .array ([np .nan , np .nan , 10 , 10.1 , 10.2 ], dtype = np . float32 )
141
+ def create_masked_and_scaled_data (dtype : type [ np . number ] = np . float32 ) -> Dataset :
142
+ x = np .array ([np .nan , np .nan , 10 , 10.1 , 10.2 ], dtype = dtype )
143
143
encoding = {
144
144
"_FillValue" : - 1 ,
145
- "add_offset" : 10 ,
146
- "scale_factor" : np . float32 (0.1 ),
145
+ "add_offset" : dtype ( 10 ) ,
146
+ "scale_factor" : dtype (0.1 ),
147
147
"dtype" : "i2" ,
148
148
}
149
149
return Dataset ({"x" : ("t" , x , {}, encoding )})
150
150
151
151
152
- def create_encoded_masked_and_scaled_data () -> Dataset :
153
- attributes = {"_FillValue" : - 1 , "add_offset" : 10 , "scale_factor" : np .float32 (0.1 )}
152
+ def create_encoded_masked_and_scaled_data (
153
+ dtype : type [np .number ] = np .float32 ,
154
+ ) -> Dataset :
155
+ attributes = {"_FillValue" : - 1 , "add_offset" : dtype (10 ), "scale_factor" : dtype (0.1 )}
154
156
return Dataset (
155
157
{"x" : ("t" , np .array ([- 1 , - 1 , 0 , 1 , 2 ], dtype = np .int16 ), attributes )}
156
158
)
157
159
158
160
159
- def create_unsigned_masked_scaled_data () -> Dataset :
161
+ def create_unsigned_masked_scaled_data (
162
+ dtype : type [np .number ] = np .float32 ,
163
+ ) -> Dataset :
160
164
encoding = {
161
165
"_FillValue" : 255 ,
162
166
"_Unsigned" : "true" ,
163
167
"dtype" : "i1" ,
164
- "add_offset" : 10 ,
165
- "scale_factor" : np . float32 (0.1 ),
168
+ "add_offset" : dtype ( 10 ) ,
169
+ "scale_factor" : dtype (0.1 ),
166
170
}
167
- x = np .array ([10.0 , 10.1 , 22.7 , 22.8 , np .nan ], dtype = np . float32 )
171
+ x = np .array ([10.0 , 10.1 , 22.7 , 22.8 , np .nan ], dtype = dtype )
168
172
return Dataset ({"x" : ("t" , x , {}, encoding )})
169
173
170
174
171
- def create_encoded_unsigned_masked_scaled_data () -> Dataset :
175
+ def create_encoded_unsigned_masked_scaled_data (
176
+ dtype : type [np .number ] = np .float32 ,
177
+ ) -> Dataset :
172
178
# These are values as written to the file: the _FillValue will
173
179
# be represented in the signed form.
174
180
attributes = {
175
181
"_FillValue" : - 1 ,
176
182
"_Unsigned" : "true" ,
177
- "add_offset" : 10 ,
178
- "scale_factor" : np . float32 (0.1 ),
183
+ "add_offset" : dtype ( 10 ) ,
184
+ "scale_factor" : dtype (0.1 ),
179
185
}
180
186
# Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned
181
187
sb = np .asarray ([0 , 1 , 127 , - 128 , - 1 ], dtype = "i1" )
182
188
return Dataset ({"x" : ("t" , sb , attributes )})
183
189
184
190
185
- def create_bad_unsigned_masked_scaled_data () -> Dataset :
191
+ def create_bad_unsigned_masked_scaled_data (
192
+ dtype : type [np .number ] = np .float32 ,
193
+ ) -> Dataset :
186
194
encoding = {
187
195
"_FillValue" : 255 ,
188
196
"_Unsigned" : True ,
189
197
"dtype" : "i1" ,
190
- "add_offset" : 10 ,
191
- "scale_factor" : np . float32 (0.1 ),
198
+ "add_offset" : dtype ( 0 ) ,
199
+ "scale_factor" : dtype (0.1 ),
192
200
}
193
- x = np .array ([10.0 , 10.1 , 22.7 , 22.8 , np .nan ], dtype = np . float32 )
201
+ x = np .array ([10.0 , 10.1 , 22.7 , 22.8 , np .nan ], dtype = dtype )
194
202
return Dataset ({"x" : ("t" , x , {}, encoding )})
195
203
196
204
197
- def create_bad_encoded_unsigned_masked_scaled_data () -> Dataset :
205
+ def create_bad_encoded_unsigned_masked_scaled_data (
206
+ dtype : type [np .number ] = np .float32 ,
207
+ ) -> Dataset :
198
208
# These are values as written to the file: the _FillValue will
199
209
# be represented in the signed form.
200
210
attributes = {
201
211
"_FillValue" : - 1 ,
202
212
"_Unsigned" : True ,
203
- "add_offset" : 10 ,
204
- "scale_factor" : np . float32 (0.1 ),
213
+ "add_offset" : dtype ( 10 ) ,
214
+ "scale_factor" : dtype (0.1 ),
205
215
}
206
216
# Create signed data corresponding to [0, 1, 127, 128, 255] unsigned
207
217
sb = np .asarray ([0 , 1 , 127 , - 128 , - 1 ], dtype = "i1" )
208
218
return Dataset ({"x" : ("t" , sb , attributes )})
209
219
210
220
211
- def create_signed_masked_scaled_data () -> Dataset :
221
+ def create_signed_masked_scaled_data (
222
+ dtype : type [np .number ] = np .float32 ,
223
+ ) -> Dataset :
212
224
encoding = {
213
225
"_FillValue" : - 127 ,
214
226
"_Unsigned" : "false" ,
215
227
"dtype" : "i1" ,
216
- "add_offset" : 10 ,
217
- "scale_factor" : np . float32 (0.1 ),
228
+ "add_offset" : dtype ( 10 ) ,
229
+ "scale_factor" : dtype (0.1 ),
218
230
}
219
- x = np .array ([- 1.0 , 10.1 , 22.7 , np .nan ], dtype = np . float32 )
231
+ x = np .array ([- 1.0 , 10.1 , 22.7 , np .nan ], dtype = dtype )
220
232
return Dataset ({"x" : ("t" , x , {}, encoding )})
221
233
222
234
223
- def create_encoded_signed_masked_scaled_data () -> Dataset :
235
+ def create_encoded_signed_masked_scaled_data (
236
+ dtype : type [np .number ] = np .float32 ,
237
+ ) -> Dataset :
224
238
# These are values as written to the file: the _FillValue will
225
239
# be represented in the signed form.
226
240
attributes = {
227
241
"_FillValue" : - 127 ,
228
242
"_Unsigned" : "false" ,
229
- "add_offset" : 10 ,
230
- "scale_factor" : np . float32 (0.1 ),
243
+ "add_offset" : dtype ( 10 ) ,
244
+ "scale_factor" : dtype (0.1 ),
231
245
}
232
246
# Create signed data corresponding to [0, 1, 127, 128, 255] unsigned
233
247
sb = np .asarray ([- 110 , 1 , 127 , - 127 ], dtype = "i1" )
@@ -859,6 +873,8 @@ def test_roundtrip_string_with_fill_value_nchar(self) -> None:
859
873
with self .roundtrip (original ) as actual :
860
874
assert_identical (expected , actual )
861
875
876
+ # Todo: (kmuehlbauer) make this work np.float64
877
+ @pytest .mark .parametrize ("dtype" , [np .float32 ])
862
878
@pytest .mark .parametrize (
863
879
"decoded_fn, encoded_fn" ,
864
880
[
@@ -878,9 +894,20 @@ def test_roundtrip_string_with_fill_value_nchar(self) -> None:
878
894
(create_masked_and_scaled_data , create_encoded_masked_and_scaled_data ),
879
895
],
880
896
)
881
- def test_roundtrip_mask_and_scale (self , decoded_fn , encoded_fn ) -> None :
882
- decoded = decoded_fn ()
883
- encoded = encoded_fn ()
897
+ def test_roundtrip_mask_and_scale (
898
+ self ,
899
+ decoded_fn : Callable [[type [np .number ]], Dataset ],
900
+ encoded_fn : Callable [[type [np .number ]], Dataset ],
901
+ dtype : type [np .number ],
902
+ ) -> None :
903
+ if dtype == np .float32 and isinstance (
904
+ self , (TestZarrDirectoryStore , TestZarrDictStore )
905
+ ):
906
+ pytest .skip (
907
+ "zarr attributes (eg. `scale_factor` are unconditionally promoted to `float64`"
908
+ )
909
+ decoded = decoded_fn (dtype )
910
+ encoded = encoded_fn (dtype )
884
911
885
912
with self .roundtrip (decoded ) as actual :
886
913
for k in decoded .variables :
@@ -901,7 +928,7 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn) -> None:
901
928
902
929
# make sure roundtrip encoding didn't change the
903
930
# original dataset.
904
- assert_allclose (encoded , encoded_fn (), decode_bytes = False )
931
+ assert_allclose (encoded , encoded_fn (dtype ), decode_bytes = False )
905
932
906
933
with self .roundtrip (encoded ) as actual :
907
934
for k in decoded .variables :
0 commit comments