Skip to content

Add NAN code support to CAN #989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion covid_act_now/delphi_covid_act_now/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,34 @@

from delphi_utils import (
create_export_csv,
get_structured_logger
get_structured_logger,
Nans
)

from .constants import GEO_RESOLUTIONS, SIGNALS
from .geo import geo_map
from .pull import load_data, extract_testing_metrics

def add_nancodes(df, signal):
"""Add nancodes to the dataframe."""
# Default missingness codes
df["missing_val"] = Nans.NOT_MISSING
df["missing_se"] = Nans.NOT_MISSING if signal == "pcr_tests_positive" else Nans.NOT_APPLICABLE
df["missing_sample_size"] = (
Nans.NOT_MISSING if signal == "pcr_tests_positive" else Nans.NOT_APPLICABLE
)

# Mark any nans with unknown
val_nans_mask = df["val"].isnull()
df.loc[val_nans_mask, "missing_val"] = Nans.OTHER
if signal == "pcr_tests_positive":
se_nans_mask = df["se"].isnull()
df.loc[se_nans_mask, "missing_se"] = Nans.OTHER
sample_size_nans_mask = df["sample_size"].isnull()
df.loc[sample_size_nans_mask, "missing_sample_size"] = Nans.OTHER

return df

def run_module(params):
"""
Run the CAN testing metrics indicator.
Expand Down Expand Up @@ -59,6 +80,7 @@ def run_module(params):
df = geo_map(df_county_testing, geo_res)

# Export 'pcr_specimen_positivity_rate'
df = add_nancodes(df, "pcr_tests_positive")
exported_csv_dates = create_export_csv(
df,
export_dir=export_dir,
Expand All @@ -69,6 +91,7 @@ def run_module(params):
df["val"] = df["sample_size"]
df["sample_size"] = np.nan
df["se"] = np.nan
df = add_nancodes(df, "pcr_tests_total")
exported_csv_dates = create_export_csv(
df,
export_dir=export_dir,
Expand Down
7 changes: 6 additions & 1 deletion covid_act_now/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def test_output_files(self, clean_receiving_dir):
run_module(self.PARAMS)
csv_files = set(listdir("receiving"))
csv_files.discard(".gitignore")
today = pd.Timestamp.today().date().strftime("%Y%m%d")

expected_files = set()
for signal in SIGNALS:
Expand All @@ -30,7 +31,11 @@ def test_output_files(self, clean_receiving_dir):
# All output files exist
assert csv_files == expected_files

expected_columns = [
"geo_id", "val", "se", "sample_size",
"missing_val", "missing_se", "missing_sample_size"
]
# All output files have correct columns
for csv_file in csv_files:
df = pd.read_csv(join("receiving", csv_file))
assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all()
assert (df.columns.values == expected_columns).all()