6
6
7
7
from .config import Config
8
8
9
+ def format_df (df : pd .DataFrame , geo_id : str , se : bool , logger ):
10
+ '''
11
+
12
+ Parameters
13
+ ----------
14
+ df
15
+ geo_id
16
+ se
17
+ logger
18
+
19
+ Returns
20
+ -------
21
+
22
+ '''
23
+ # report in percentage
24
+ df ['val' ] = df ['val' ] * 100
25
+ df ["se" ] = df ["se" ] * 100
26
+
27
+ val_isnull = df ["val" ].isnull ()
28
+ df_val_null = df [val_isnull ]
29
+ if not df_val_null .empty :
30
+ logger .info ("sensor value is nan, check pipeline" )
31
+ filtered_df = df [~ val_isnull ]
32
+
33
+ se_too_high = filtered_df ['se' ] >= 5
34
+ df_se_too_high = filtered_df [se_too_high ]
35
+ if len (df_se_too_high .empty ) > 0 :
36
+ logger .info (f"standard error suspiciously high! investigate { geo_id } " )
37
+ filtered_df = filtered_df [~ se_too_high ]
38
+
39
+ sensor_too_high = filtered_df ['val' ] >= 90
40
+ df_sensor_too_high = filtered_df [sensor_too_high ]
41
+ if len (df_sensor_too_high ) > 0 :
42
+ logger .info (f"standard error suspiciously high! investigate { geo_id } " )
43
+ filtered_df = filtered_df [~ sensor_too_high ]
44
+
45
+ if se :
46
+ valid_cond = filtered_df ['se' ] > 0 & filtered_df ['val' ] > 0
47
+ invalid_df = filtered_df [~ valid_cond ]
48
+ if len (invalid_df ) > 0 :
49
+ logger .info (f"p=0, std_err=0 invalid" )
50
+ filtered_df = filtered_df [valid_cond ]
51
+ else :
52
+ filtered_df .drop (columns = ['se' ], inplace = True )
53
+
54
+
9
55
10
56
def write_to_csv (output_df : pd .DataFrame , geo_level : str , se :bool , out_name : str , logger , output_path = "." ):
11
57
"""Write sensor values to csv.
@@ -27,27 +73,7 @@ def write_to_csv(output_df: pd.DataFrame, geo_level: str, se:bool, out_name: str
27
73
geo_level ,
28
74
out_name )
29
75
single_date_df = output_df [output_df ["date" ] == d ]
30
- with open (filename , "w" ) as outfile :
31
- outfile .write ("geo_id,val,se,direction,sample_size\n " )
32
-
33
- for line in single_date_df .itertuples ():
34
- geo_id = line .geo_id
35
- sensor = 100 * line .val # report percentages
36
- se_val = 100 * line .se
37
- assert not np .isnan (sensor ), "sensor value is nan, check pipeline"
38
- assert sensor < 90 , f"strangely high percentage { geo_id , sensor } "
39
- if not np .isnan (se_val ):
40
- assert se_val < 5 , f"standard error suspiciously high! investigate { geo_id } "
41
-
42
- if se :
43
- assert sensor > 0 and se_val > 0 , "p=0, std_err=0 invalid"
44
- outfile .write (
45
- "%s,%f,%s,%s,%s\n " % (geo_id , sensor , se_val , "NA" , "NA" ))
46
- else :
47
- # for privacy reasons we will not report the standard error
48
- outfile .write (
49
- "%s,%f,%s,%s,%s\n " % (geo_id , sensor , "NA" , "NA" , "NA" ))
50
- out_n += 1
76
+
51
77
logger .debug (f"wrote { out_n } rows for { geo_level } " )
52
78
53
79
0 commit comments