Skip to content

Commit d10a58f

Browse files
committed
Nancodes for claims_hosp:
* add missing columns, allow nan values
1 parent fcc3a25 commit d10a58f

File tree

3 files changed

+45
-14
lines changed

3 files changed

+45
-14
lines changed

claims_hosp/delphi_claims_hosp/update_indicator.py

+36-6
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# third party
1414
import numpy as np
1515
import pandas as pd
16-
from delphi_utils import GeoMapper
16+
from delphi_utils import GeoMapper, Nans
1717

1818
# first party
1919
from .config import Config, GeoConstants
@@ -224,7 +224,7 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
224224
all_rates = output_dict["rates"]
225225
all_se = output_dict["se"]
226226
all_include = output_dict["include"]
227-
out_n = 0
227+
out_n, out_i = 0, 0
228228
for i, date in enumerate(dates):
229229
filename = "%s/%s_%s_%s.csv" % (
230230
output_path,
@@ -233,7 +233,10 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
233233
self.signal_name,
234234
)
235235
with open(filename, "w") as outfile:
236-
outfile.write("geo_id,val,se,direction,sample_size\n")
236+
outfile.write(
237+
"geo_id,val,se,direction,sample_size," +
238+
"missing_val,missing_se,missing_sample_size\n"
239+
)
237240
for geo_id in geo_ids:
238241
val = all_rates[geo_id][i]
239242
se = all_se[geo_id][i]
@@ -246,11 +249,38 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
246249
if self.write_se:
247250
assert val > 0 and se > 0, "p=0, std_err=0 invalid"
248251
outfile.write(
249-
"%s,%f,%s,%s,%s\n" % (geo_id, val, se, "NA", "NA"))
252+
"%s,%f,%s,%s,%s,%d,%d,%d\n" % (
253+
geo_id, val, se, "NA", "NA",
254+
Nans.NOT_MISSING.value,
255+
Nans.NOT_MISSING.value,
256+
Nans.NOT_APPLICABLE.value
257+
)
258+
)
250259
else:
251260
# for privacy reasons we will not report the standard error
252261
outfile.write(
253-
"%s,%f,%s,%s,%s\n" % (geo_id, val, "NA", "NA", "NA"))
262+
"%s,%f,%s,%s,%s,%d,%d,%d\n" % (
263+
geo_id, val, "NA", "NA", "NA",
264+
Nans.NOT_MISSING.value,
265+
Nans.PRIVACY.value,
266+
Nans.NOT_APPLICABLE.value
267+
)
268+
)
254269
out_n += 1
270+
else:
271+
# Write nans out anyway for versioning
272+
logging.warning("writing insufficient data for geo_id {0}, {1}".format(
273+
geo_id, i
274+
))
275+
outfile.write(
276+
"%s,%s,%s,%s,%s,%d,%d,%d\n" % (
277+
geo_id, "NA", "NA", "NA", "NA",
278+
Nans.PRIVACY.value,
279+
Nans.PRIVACY.value,
280+
Nans.NOT_APPLICABLE.value
281+
)
282+
)
283+
out_i += 1
255284

256-
logging.debug("wrote %d rows for %d %s", out_n, len(geo_ids), geo_level)
285+
logging.debug("wrote %d valued csvs for %d %s", out_n, len(geo_ids), geo_level)
286+
logging.debug("wrote %d nan-valued csvs for %d %s", out_i, len(geo_ids), geo_level)

claims_hosp/tests/test_indicator.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,12 @@ def test_fit_fips(self):
5757
date_range = pd.date_range("2020-05-01", "2020-05-20")
5858
all_fips = self.fips_data.fips.unique()
5959
loc_index_fips_data = self.fips_data.set_index(["fips", "date"])
60-
sample_fips = nr.choice(all_fips, 10)
60+
sample_fips = all_fips[:50]
6161

6262
for fips in sample_fips:
6363
sub_data = loc_index_fips_data.loc[fips]
6464
sub_data = sub_data.reindex(date_range, fill_value=0)
6565
res0 = ClaimsHospIndicator.fit(sub_data, date_range[0], fips)
66-
# first value is burn-in
6766
assert np.min(res0["rate"][1:]) > 0
6867
assert np.max(res0["rate"][1:]) <= 100
6968

claims_hosp/tests/test_update_indicator.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,9 @@ def test_write_to_csv_results(self):
141141
expected_name = f"20200502_geography_{Config.signal_name}.csv"
142142
assert exists(join(td.name, expected_name))
143143
output_data = pd.read_csv(join(td.name, expected_name))
144+
expected_columns = ["geo_id", "val", "se", "direction", "sample_size", "missing_val", "missing_se", "missing_sample_size"]
144145
assert (
145-
output_data.columns == ["geo_id", "val", "se", "direction", "sample_size"]
146+
output_data.columns == expected_columns
146147
).all()
147148
assert (output_data.geo_id == ["a", "b"]).all()
148149
assert np.array_equal(output_data.val.values, np.array([0.1, 1]))
@@ -156,10 +157,10 @@ def test_write_to_csv_results(self):
156157
assert exists(join(td.name, expected_name))
157158
output_data = pd.read_csv(join(td.name, expected_name))
158159
assert (
159-
output_data.columns == ["geo_id", "val", "se", "direction", "sample_size"]
160+
output_data.columns == expected_columns
160161
).all()
161-
assert (output_data.geo_id == ["a"]).all()
162-
assert np.array_equal(output_data.val.values, np.array([0.5]))
162+
assert (output_data.geo_id == ["a", "b"]).all()
163+
assert np.array_equal(output_data.val.values, np.array([0.5, np.nan]), equal_nan=True)
163164
assert np.isnan(output_data.se.values).all()
164165
assert np.isnan(output_data.direction.values).all()
165166
assert np.isnan(output_data.sample_size.values).all()
@@ -168,7 +169,7 @@ def test_write_to_csv_results(self):
168169
assert exists(join(td.name, expected_name))
169170
output_data = pd.read_csv(join(td.name, expected_name))
170171
assert (
171-
output_data.columns == ["geo_id", "val", "se", "direction", "sample_size"]
172+
output_data.columns == expected_columns
172173
).all()
173174
assert (output_data.geo_id == ["a", "b"]).all()
174175
assert np.array_equal(output_data.val.values, np.array([1.5, 3]))
@@ -221,8 +222,9 @@ def test_write_to_csv_with_se_results(self):
221222
expected_name = f"20200502_geography_{signal_name}.csv"
222223
assert exists(join(td.name, expected_name))
223224
output_data = pd.read_csv(join(td.name, expected_name))
225+
expected_columns = ["geo_id", "val", "se", "direction", "sample_size", "missing_val", "missing_se", "missing_sample_size"]
224226
assert (
225-
output_data.columns == ["geo_id", "val", "se", "direction", "sample_size"]
227+
output_data.columns == expected_columns
226228
).all()
227229
assert (output_data.geo_id == ["a", "b"]).all()
228230
assert np.array_equal(output_data.val.values, np.array([0.1, 1]))

0 commit comments

Comments
 (0)