13
13
# third party
14
14
import numpy as np
15
15
import pandas as pd
16
- from delphi_utils import GeoMapper
16
+ from delphi_utils import GeoMapper , Nans
17
17
18
18
# first party
19
19
from .config import Config , GeoConstants
@@ -224,7 +224,7 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
224
224
all_rates = output_dict ["rates" ]
225
225
all_se = output_dict ["se" ]
226
226
all_include = output_dict ["include" ]
227
- out_n = 0
227
+ out_n , out_i = 0 , 0
228
228
for i , date in enumerate (dates ):
229
229
filename = "%s/%s_%s_%s.csv" % (
230
230
output_path ,
@@ -233,7 +233,10 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
233
233
self .signal_name ,
234
234
)
235
235
with open (filename , "w" ) as outfile :
236
- outfile .write ("geo_id,val,se,direction,sample_size\n " )
236
+ outfile .write (
237
+ "geo_id,val,se,direction,sample_size," +
238
+ "missing_val,missing_se,missing_sample_size\n "
239
+ )
237
240
for geo_id in geo_ids :
238
241
val = all_rates [geo_id ][i ]
239
242
se = all_se [geo_id ][i ]
@@ -246,11 +249,38 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
246
249
if self .write_se :
247
250
assert val > 0 and se > 0 , "p=0, std_err=0 invalid"
248
251
outfile .write (
249
- "%s,%f,%s,%s,%s\n " % (geo_id , val , se , "NA" , "NA" ))
252
+ "%s,%f,%s,%s,%s,%d,%d,%d\n " % (
253
+ geo_id , val , se , "NA" , "NA" ,
254
+ Nans .NOT_MISSING .value ,
255
+ Nans .NOT_MISSING .value ,
256
+ Nans .NOT_APPLICABLE .value
257
+ )
258
+ )
250
259
else :
251
260
# for privacy reasons we will not report the standard error
252
261
outfile .write (
253
- "%s,%f,%s,%s,%s\n " % (geo_id , val , "NA" , "NA" , "NA" ))
262
+ "%s,%f,%s,%s,%s,%d,%d,%d\n " % (
263
+ geo_id , val , "NA" , "NA" , "NA" ,
264
+ Nans .NOT_MISSING .value ,
265
+ Nans .PRIVACY .value ,
266
+ Nans .NOT_APPLICABLE .value
267
+ )
268
+ )
254
269
out_n += 1
270
+ else :
271
+ # Write nans out anyway for versioning
272
+ logging .warning ("writing insufficient data for geo_id {0}, {1}" .format (
273
+ geo_id , i
274
+ ))
275
+ outfile .write (
276
+ "%s,%s,%s,%s,%s,%d,%d,%d\n " % (
277
+ geo_id , "NA" , "NA" , "NA" , "NA" ,
278
+ Nans .PRIVACY .value ,
279
+ Nans .PRIVACY .value ,
280
+ Nans .NOT_APPLICABLE .value
281
+ )
282
+ )
283
+ out_i += 1
255
284
256
- logging .debug ("wrote %d rows for %d %s" , out_n , len (geo_ids ), geo_level )
285
+ logging .debug ("wrote %d valued csvs for %d %s" , out_n , len (geo_ids ), geo_level )
286
+ logging .debug ("wrote %d nan-valued csvs for %d %s" , out_i , len (geo_ids ), geo_level )
0 commit comments