From 81bd84a06c7f5d5a68ad29f396c67fa92c174214 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 7 Sep 2021 22:15:31 -0400 Subject: [PATCH 01/95] First pass of the CDC Indicator --- .github/workflows/python-ci.yml | 2 +- cdc_vaccines/.pylintrc | 22 +++ cdc_vaccines/Makefile | 25 +++ cdc_vaccines/README.md | 70 ++++++++ cdc_vaccines/REVIEW.md | 38 ++++ cdc_vaccines/cache/.gitignore | 0 cdc_vaccines/delphi_cdc_vaccines/__init__.py | 13 ++ cdc_vaccines/delphi_cdc_vaccines/__main__.py | 12 ++ cdc_vaccines/delphi_cdc_vaccines/constants.py | 54 ++++++ cdc_vaccines/delphi_cdc_vaccines/pull.py | 162 ++++++++++++++++++ cdc_vaccines/delphi_cdc_vaccines/run.py | 84 +++++++++ cdc_vaccines/params.json.template | 35 ++++ cdc_vaccines/setup.py | 29 ++++ cdc_vaccines/static/.gitignore | 0 .../tests/test_data/bad_extra_cols.csv | 8 + .../tests/test_data/bad_missing_cols.csv | 8 + .../tests/test_data/bad_missing_days.csv | 31 ++++ cdc_vaccines/tests/test_data/small.csv | 109 ++++++++++++ .../tests/test_data/small_confirmed.csv | 0 cdc_vaccines/tests/test_data/small_pull.csv | 4 + cdc_vaccines/tests/test_pull.py | 71 ++++++++ cdc_vaccines/tests/test_run.py | 80 +++++++++ 22 files changed, 856 insertions(+), 1 deletion(-) create mode 100644 cdc_vaccines/.pylintrc create mode 100644 cdc_vaccines/Makefile create mode 100644 cdc_vaccines/README.md create mode 100644 cdc_vaccines/REVIEW.md create mode 100644 cdc_vaccines/cache/.gitignore create mode 100644 cdc_vaccines/delphi_cdc_vaccines/__init__.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/__main__.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/constants.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/pull.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/run.py create mode 100644 cdc_vaccines/params.json.template create mode 100644 cdc_vaccines/setup.py create mode 100644 cdc_vaccines/static/.gitignore create mode 100644 cdc_vaccines/tests/test_data/bad_extra_cols.csv create mode 100644 cdc_vaccines/tests/test_data/bad_missing_cols.csv create mode 100644 cdc_vaccines/tests/test_data/bad_missing_days.csv create mode 100644 cdc_vaccines/tests/test_data/small.csv create mode 100644 cdc_vaccines/tests/test_data/small_confirmed.csv create mode 100644 cdc_vaccines/tests/test_data/small_pull.csv create mode 100644 cdc_vaccines/tests/test_pull.py create mode 100644 cdc_vaccines/tests/test_run.py diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index adeb011a6..45b8aea32 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -16,7 +16,7 @@ jobs: if: github.event.pull_request.draft == false strategy: matrix: - packages: [_delphi_utils_python, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph_patterns, sir_complainsalot, usafacts] + packages: [_delphi_utils_python, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph_patterns, sir_complainsalot, usafacts, cdc_vaccines] defaults: run: working-directory: ${{ matrix.packages }} diff --git a/cdc_vaccines/.pylintrc b/cdc_vaccines/.pylintrc new file mode 100644 index 000000000..f30837c7e --- /dev/null +++ b/cdc_vaccines/.pylintrc @@ -0,0 +1,22 @@ + +[MESSAGES CONTROL] + +disable=logging-format-interpolation, + too-many-locals, + too-many-arguments, + # Allow pytest functions to be part of a class. + no-self-use, + # Allow pytest classes to have one test. + too-few-public-methods + +[BASIC] + +# Allow arbitrarily short-named variables. +variable-rgx=[a-z_][a-z0-9_]* +argument-rgx=[a-z_][a-z0-9_]* +attr-rgx=[a-z_][a-z0-9_]* + +[DESIGN] + +# Don't complain about pytest "unused" arguments. +ignored-argument-names=(_.*|run_as_module) \ No newline at end of file diff --git a/cdc_vaccines/Makefile b/cdc_vaccines/Makefile new file mode 100644 index 000000000..ca3013248 --- /dev/null +++ b/cdc_vaccines/Makefile @@ -0,0 +1,25 @@ +.PHONY = venv, lint, test, clean + +dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*') + +venv: + python -m venv env + +install: venv + . env/bin/activate; \ + pip install wheel ; \ + pip install -e ../_delphi_utils_python ;\ + pip install -e . + +lint: + . env/bin/activate; pylint $(dir) + . env/bin/activate; pydocstyle $(dir) + +test: + . env/bin/activate ;\ + (cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing) + +clean: + rm -rf env + rm -f params.json + diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md new file mode 100644 index 000000000..b56682d0f --- /dev/null +++ b/cdc_vaccines/README.md @@ -0,0 +1,70 @@ +# CDC Vaccinations + +This indicator provides the number official vaccinations in the US. We export the county-level +daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. +We also aggregate the data to the HHS, State, and Nation levels. +For detailed information see the files DETAILS.md contained in this directory. + +Note that individuals could be vaccinated outside of the US. Additionally, +there is no county level data for counties in Texas and Hawaii. There are unknown +counties in each state and a row for unknown county and unknown state. + + +## Running the Indicator + +The indicator is run by directly executing the Python module contained in this +directory. The safest way to do this is to create a virtual environment, +installed the common DELPHI tools, and then install the module and its +dependencies. To do this, run the following command from this directory: + +``` +make install +``` + +This command will install the package in editable mode, so you can make changes that +will automatically propagate to the installed package. + +All of the user-changable parameters are stored in `params.json`. To execute +the module and produce the output datasets (by default, in `receiving`), run +the following: + +``` +env/bin/python -m delphi_cdc_vaccines +``` + +If you want to enter the virtual environment in your shell, +you can run `source env/bin/activate`. Run `deactivate` to leave the virtual environment. + +Once you are finished, you can remove the virtual environment and +params file with the following: + +``` +make clean +``` + +## Testing the code + +To run static tests of the code style, run the following command: + +``` +make lint +``` + +Unit tests are also included in the module. To execute these, run the following +command from this directory: + +``` +make test +``` + +To run individual tests, run the following: + +``` +(cd tests && ../env/bin/pytest test_run.py --cov=delphi_ --cov-report=term-missing) +``` + +The output will show the number of unit tests that passed and failed, along +with the percentage of code covered by the tests. + +None of the linting or unit tests should fail, and the code lines that are not covered by unit tests should be small and +should not include critical sub-routines. diff --git a/cdc_vaccines/REVIEW.md b/cdc_vaccines/REVIEW.md new file mode 100644 index 000000000..03f87b17a --- /dev/null +++ b/cdc_vaccines/REVIEW.md @@ -0,0 +1,38 @@ +## Code Review (Python) + +A code review of this module should include a careful look at the code and the +output. To assist in the process, but certainly not in replace of it, please +check the following items. + +**Documentation** + +- [ ] the README.md file template is filled out and currently accurate; it is +possible to load and test the code using only the instructions given +- [ ] minimal docstrings (one line describing what the function does) are +included for all functions; full docstrings describing the inputs and expected +outputs should be given for non-trivial functions + +**Structure** + +- [ ] code should pass lint checks (`make lint`) +- [ ] any required metadata files are checked into the repository and placed +within the directory `static` +- [ ] any intermediate files that are created and stored by the module should +be placed in the directory `cache` +- [ ] final expected output files to be uploaded to the API are placed in the +`receiving` directory; output files should not be committed to the respository +- [ ] all options and API keys are passed through the file `params.json` +- [ ] template parameter file (`params.json.template`) is checked into the +code; no personal (i.e., usernames) or private (i.e., API keys) information is +included in this template file + +**Testing** + +- [ ] module can be installed in a new virtual environment (`make install`) +- [ ] reasonably high level of unit test coverage covering all of the main logic +of the code (e.g., missing coverage for raised errors that do not currently seem +possible to reach are okay; missing coverage for options that will be needed are +not) +- [ ] all unit tests run without errors (`make test`) +- [ ] indicator directory has been added to GitHub CI +(`covidcast-indicators/.github/workflows/python-ci.yml`) diff --git a/cdc_vaccines/cache/.gitignore b/cdc_vaccines/cache/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/delphi_cdc_vaccines/__init__.py b/cdc_vaccines/delphi_cdc_vaccines/__init__.py new file mode 100644 index 000000000..6813595b4 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +"""Module to pull and clean indicators from the CDC source. + +This file defines the functions that are made public by the module. As the +module is intended to be executed though the main method, these are primarily +for testing. +""" + +from __future__ import absolute_import +from . import pull +from . import run + +__version__ = "0.1.0" diff --git a/cdc_vaccines/delphi_cdc_vaccines/__main__.py b/cdc_vaccines/delphi_cdc_vaccines/__main__.py new file mode 100644 index 000000000..32fc0eecc --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/__main__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +"""Call the function run_module when executed. + +This file indicates that calling the module (`python -m MODULE_NAME`) will +call the function `run_module` found within the run.py file. There should be +no need to change this template. +""" + +from delphi_utils import read_params +from .run import run_module # pragma: no cover + +run_module(read_params()) # pragma: no cover diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py new file mode 100644 index 000000000..113ecb69e --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -0,0 +1,54 @@ +"""Registry for variations.""" + + +from delphi_utils import Smoother + +FULL_VAC_SUM = "cumulative_counts_tot_vaccine" +FULL_VAC = "counts_tot_vaccine" +FULL_VAC_SUM_12P = "cumulative_counts_tot_vaccine_12P" +FULL_VAC_12P = "counts_tot_vaccine_12P" +FULL_VAC_SUM_18P = "cumulative_counts_tot_vaccine_18P" +FULL_VAC_18P = "counts_tot_vaccine_18P" +FULL_VAC_SUM_65P = "cumulative_counts_tot_vaccine_65P" +FULL_VAC_65P = "counts_tot_vaccine_65P" +PART_VAC_SUM = "cumulative_counts_part_vaccine" +PART_VAC = "counts_part_vaccine" +PART_VAC_SUM_12P = "cumulative_counts_part_vaccine_12P" +PART_VAC_12P = "counts_part_vaccine_12P" +PART_VAC_SUM_18P = "cumulative_counts_part_vaccine_18P" +PART_VAC_18P = "counts_part_vaccine_18P" +PART_VAC_SUM_65P = "cumulative_counts_part_vaccine_65P" +PART_VAC_65P = "counts_part_vaccine_65P" + + + +SIGNALS = [ + FULL_VAC_SUM , + FULL_VAC , + FULL_VAC_SUM_12P , + FULL_VAC_12P , + FULL_VAC_SUM_18P , + FULL_VAC_18P , + FULL_VAC_SUM_65P , + FULL_VAC_65P , + PART_VAC_SUM , + PART_VAC , + PART_VAC_SUM_12P , + PART_VAC_12P , + PART_VAC_SUM_18P , + PART_VAC_18P , + PART_VAC_SUM_65P , + PART_VAC_65P +] + +GEOS = [ + "nation", + "state_code", + "hrr", + "hhs" +] + +SMOOTHERS = [ + (Smoother("identity", impute_method=None), ""), + (Smoother("moving_average", window_length=7), "_7dav"), +] diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py new file mode 100644 index 000000000..e19043163 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +"""Functions for pulling data from the CDC data website for vaccines.""" +import hashlib +from logging import Logger +from delphi_utils.geomap import GeoMapper +import numpy as np +import pandas as pd + + + + + +def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: + """Pull the latest data from the CDC on vaccines and conform it into a dataset. + + The output dataset has: + - Each row corresponds to (County, Date), denoted (FIPS, timestamp) + - Each row additionally has columns that correspond to the counts or + cumulative counts of vaccination status (fully vaccinated, + partially vaccinated) of various age groups (all, 12+, 18+, 65+) + from December 13th 2020 until the latest date + + Note that the raw dataset gives the `cumulative` metrics, from which + we compute `counts` by taking first differences. Hence, `counts` + may be negative. This is wholly dependent on the quality of the raw + dataset. + + We filter the data such that we only keep rows with valid FIPS, or "FIPS" + codes defined under the exceptions of the README. The current exceptions + include: + # - 0: statewise unallocated + Parameters + ---------- + base_url: str + Base URL for pulling the CDC Vaccination Data + logger: Logger + Returns + ------- + pd.DataFrame + Dataframe as described above. + """ + # Columns to drop the the data frame. + drop_columns = [ + "date", + "recip_state", + "series_complete_pop_pct", + "mmwr_week", + "recip_county", + "state_id" + ] + + + # Read data + df = pd.read_csv(base_url) + logger.info("data retrieved from source", + num_rows=df.shape[0], + num_cols=df.shape[1], + min_date=min(df['Date']), + max_date=max(df['Date']), + checksum=hashlib.sha256(pd.util.hash_pandas_object(df).values).hexdigest()) + df.columns = [i.lower() for i in df.columns] + + df['recip_state'] = df['recip_state'].str.lower() + drop_columns.extend([x for x in df.columns if ("pct" in x) | ("svi" in x)]) + drop_columns = list(set(drop_columns)) + df = GeoMapper().add_geocode(df, "state_id", "state_code", + from_col="recip_state", new_col="state_id", dropna=False) + df['state_id'] = df['state_id'].fillna('0').astype(int) + # Change FIPS from 0 to XX000 for statewise unallocated cases/deaths + unassigned_index = (df["fips"] == "UNK") + df.loc[unassigned_index, "fips"] = df["state_id"].loc[unassigned_index].values * 1000 + + # Conform FIPS + df["fips"] = df["fips"].apply(lambda x: f"{int(x):05d}") + df["timestamp"] = pd.to_datetime(df["date"]) + # Drop unnecessary columns (state is pre-encoded in fips) + try: + df.drop(drop_columns, axis=1, inplace=True) + except KeyError as e: + raise ValueError( + "Tried to drop non-existent columns. The dataset " + "schema may have changed. Please investigate and " + "amend drop_columns." + ) from e + # timestamp: str -> datetime + df.columns = ["fips", + "cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "timestamp"] + df_dummy = df.loc[(df["fips"]!='00000') & (df["timestamp"] == min(df["timestamp"]))].copy() + #handle fips 00000 separately + df_oth = df.loc[((df["fips"]=='00000') & + (df["timestamp"]==min(df[df['fips'] == '00000']['timestamp'])))].copy() + df_dummy = pd.concat([df_dummy, df_oth]) + df_dummy.loc[:, "timestamp"] = df_dummy.loc[:, "timestamp"] - pd.Timedelta(days=1) + df_dummy.loc[:, ["cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + ]] = 0 + + df =pd.concat([df_dummy, df]) + # Obtain new_counts + df.sort_values(["fips", "timestamp"], inplace=True) + df["counts_tot_vaccine"] = df["cumulative_counts_tot_vaccine"].diff() # 1st discrete difference + df["counts_tot_vaccine_12P"] = df["cumulative_counts_tot_vaccine_12P"].diff() + df["counts_tot_vaccine_18P"] = df["cumulative_counts_tot_vaccine_18P"].diff() + df["counts_tot_vaccine_65P"] = df["cumulative_counts_tot_vaccine_65P"].diff() + df["counts_part_vaccine"] = df["cumulative_counts_part_vaccine"].diff() + df["counts_part_vaccine_12P"] = df["cumulative_counts_part_vaccine_12P"].diff() + df["counts_part_vaccine_18P"] = df["cumulative_counts_part_vaccine_18P"].diff() + df["counts_part_vaccine_65P"] = df["cumulative_counts_part_vaccine_65P"].diff() + + rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] + # Handle edge cases where we diffed across fips + mask = df["fips"] != df["fips"].shift(1) + df.loc[mask, rem_list] = np.nan + print(rem_list) + df.reset_index(inplace=True, drop=True) + # Final sanity checks + unique_days = df["timestamp"].unique() + min_timestamp = min(unique_days) + max_timestamp = max(unique_days) + n_days = (max_timestamp - min_timestamp) / np.timedelta64(1, "D") + 1 + if n_days != len(unique_days): + raise ValueError( + f"Not every day between {min_timestamp} and " + "{max_timestamp} is represented." + ) + return df.loc[ + df["timestamp"] >= min(df["timestamp"]), + [ # Reorder + "fips", + "timestamp", + "cumulative_counts_tot_vaccine", + "counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "counts_part_vaccine_65P" + ], + ].reset_index(drop=True) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py new file mode 100644 index 000000000..455ca44d2 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +"""Functions to call when running the function. + +This module should contain a function called `run_module`, that is executed +when the module is run with `python -m MODULE_NAME`. `run_module`'s lone argument should be a +nested dictionary of parameters loaded from the params.json file. We expect the `params` to have +the following structure: + - "common": + - "export_dir": str, directory to which the results are exported + - "log_filename": (optional) str, path to log file + - "indicator": (optional) + - "wip_signal": (optional) Any[str, bool], list of signals that are works in progress, or + True if all signals in the registry are works in progress, or False if only + unpublished signals are. See `delphi_utils.add_prefix()` + - Any other indicator-specific settings +""" +from datetime import timedelta, datetime +from itertools import product +import time as tm +import numpy as np +from delphi_utils.export import create_export_csv +from delphi_utils.geomap import GeoMapper +from delphi_utils import get_structured_logger +from .constants import GEOS, SIGNALS, SMOOTHERS +from .pull import pull_cdcvacc_data + + +def run_module(params): + """ + Run the indicator. + + Arguments + -------- + params: Dict[str, Any] + Nested dictionary of parameters. + """ + start_time = tm.time() + logger = get_structured_logger( + __name__, filename=params["common"].get("log_filename"), + log_exceptions=params["common"].get("log_exceptions", True)) + base_url = params["indicator"]["base_url"] + ## build the base version of the signal at the most detailed geo level you can get. + all_data = pull_cdcvacc_data(base_url, logger) + run_stats = [] + ## aggregate & smooth + for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): + logger.info("Running on ", + sensor=sensor, + smoother=smoother, + geo=geo) + df = GeoMapper().replace_geocode( + all_data[['timestamp','fips', sensor]],from_col='fips', + from_code="fips", + new_col="geo_id", + new_code=geo, + date_col="timestamp") + df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( + smoother[0].smooth + ) + df["se"] = np.nan + df["sample_size"] = np.nan + sensor_name = sensor + smoother[1] + if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): + # don't export first 6 days for smoothed signals since they'll be nan. + start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) + exported_csv_dates = create_export_csv( + df, + params["common"]["export_dir"], + geo, + sensor_name, + start_date=start_date) + if len(exported_csv_dates) > 0: + run_stats.append((max(exported_csv_dates), len(exported_csv_dates))) + ## log this indicator run + elapsed_time_in_seconds = round(tm.time() - start_time, 2) + min_max_date = run_stats and min(s[0] for s in run_stats) + csv_export_count = sum(s[-1] for s in run_stats) + max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days + formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d") + logger.info("Completed indicator run", + elapsed_time_in_seconds = elapsed_time_in_seconds, + csv_export_count = csv_export_count, + max_lag_in_days = max_lag_in_days, + oldest_final_export_date = formatted_min_max_date) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template new file mode 100644 index 000000000..3d5337846 --- /dev/null +++ b/cdc_vaccines/params.json.template @@ -0,0 +1,35 @@ +{ + "common": { + "export_dir": "./receiving", + "log_filename": "cdc_vaccines.log" + }, + "indicator": { + "base_url": "https://data.cdc.gov/api/views/8xkx-amqh/rows.csv", + "export_start_date": "2020-12-13" + }, + "validation": { + "common": { + "data_source": "cdc", + "span_length": 14, + "min_expected_lag": {"all": "1"}, + "max_expected_lag": {"all": "7"}, + "dry_run": true, + "suppressed_errors": [] + }, + "static": { + "minimum_sample_size": 0, + "missing_se_allowed": true, + "missing_sample_size_allowed": true + }, + "dynamic": {} + }, + "archive": { + "aws_credentials": { + "aws_access_key_id": "", + "aws_secret_access_key": "" + }, + "bucket_name": "", + "indicator_prefix": "usafacts", + "cache_dir": "./cache" + } +} diff --git a/cdc_vaccines/setup.py b/cdc_vaccines/setup.py new file mode 100644 index 000000000..8802dfd45 --- /dev/null +++ b/cdc_vaccines/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup +from setuptools import find_packages + +required = [ + "numpy", + "pandas", + "pydocstyle", + "pytest", + "pytest-cov", + "pylint==2.8.3", + "delphi-utils", + "covidcast" +] + +setup( + name="delphi_cdc_vaccines", + version="0.0.1", + description="The number of people who are vaccinated per county.", + author="Ananya Joshi", + author_email="aajoshi@andrew.cmu.edu", + url="https://github.com/cmu-delphi/covidcast-indicators", + install_requires=required, + classifiers=[ + "Development Status :: 0 - Attempt", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3.8", + ], + packages=find_packages(), +) diff --git a/cdc_vaccines/static/.gitignore b/cdc_vaccines/static/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/tests/test_data/bad_extra_cols.csv b/cdc_vaccines/tests/test_data/bad_extra_cols.csv new file mode 100644 index 000000000..6642296a1 --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_extra_cols.csv @@ -0,0 +1,8 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI,Extra_Administered_Dose1_Recip_12PlusPop_Pct,Extra_Administered_Dose1_Recip_18Plus,Extra_Administered_Dose1_Recip_18PlusPop_Pct,Extra_Administered_Dose1_Recip_65Plus,Extra_Administered_Dose1_Recip_65PlusPop_Pct,Extra_SVI_CTGY +8/26/21,UNK,34,Unknown County,UNK,0,789625,789591,0,733809,0,55620,0,0,1119266,0,1119203,0,1035082,0,75596,0,,,,,,0,1035082,0,75596,0, +8/26/21,32013,34,Humboldt County,NV,32.9,5537,5535,40.2,5368,43.6,1696,69.9,94.9,6293,37.4,6290,45.6,6014,48.9,1877,77.3,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,45.6,6014,48.9,1877,77.3,Mod-High +8/26/21,47131,34,Obion County,TN,28.4,8529,8529,33.1,8412,35.7,4114,66,97.8,10758,35.8,10755,41.7,10520,44.6,4625,74.2,High,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI,Mod-High VC/High SVI,41.7,10520,44.6,4625,74.2,High +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,,,0,0,0,0,0,High +8/26/21,UNK,34,Unknown County,VA,0,2358403,2352494,0,2206696,0,603704,0,51.3,2705300,0,2696267,0,2516857,0,660454,0,,,,,,0,2516857,0,660454,0, +8/26/21,51678,34,Lexington city,VA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Low-Mod,,,,,0,0,0,0,0,Low-Mod +8/26/21,50025,34,Windham County,VT,56.8,23963,23963,64,22620,65.1,7718,76.5,73.7,27598,65.4,27588,73.7,25830,74.3,8588,85.1,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,73.7,25830,74.3,8588,85.1,Low-Mod \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/bad_missing_cols.csv b/cdc_vaccines/tests/test_data/bad_missing_cols.csv new file mode 100644 index 000000000..4efd5c1bd --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_missing_cols.csv @@ -0,0 +1,8 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_65PlusPop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus +8/26/21,UNK,34,Unknown County,UNK,0,0,1119203,0,1035082,0,75596 +8/26/21,32013,34,Humboldt County,NV,32.9,69.9,6290,45.6,6014,48.9,1877 +8/26/21,47131,34,Obion County,TN,28.4,66,10755,41.7,10520,44.6,4625 +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0 +8/26/21,UNK,34,Unknown County,VA,0,0,2696267,0,2516857,0,660454 +8/26/21,51678,34,Lexington city,VA,0,0,0,0,0,0,0 +8/26/21,50025,34,Windham County,VT,56.8,76.5,27588,73.7,25830,74.3,8588 \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/bad_missing_days.csv b/cdc_vaccines/tests/test_data/bad_missing_days.csv new file mode 100644 index 000000000..75a15510c --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_missing_days.csv @@ -0,0 +1,31 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/25/21,26007,34,Alpena County,MI,44.8,12730,12730,51,12344,53.5,5137,75.8,93.8,13426,47.3,13425,53.8,13293,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/25/21,30071,34,Phillips County,MT,33.8,1337,1337,40.1,1295,43,618,67.5,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/25/21,41029,34,Jackson County,OR,46,101653,101502,53.2,97081,55.2,37628,75.5,97.7,115107,52.1,114881,60.2,109038,62,40450,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/25/21,UNK,34,Unknown County,UNK,0,787349,787315,0,731890,0,55518,0,0,1116357,0,1116294,0,1032597,0,75307,0,,,,, +8/25/21,49041,34,Sevier County,UT,30.7,6630,6630,37.5,6350,41.3,2602,72.9,97.2,8092,37.4,8089,45.8,7606,49.5,2928,82.1,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/25/21,26013,34,Baraga County,MI,48.8,4002,4001,54.4,3927,57.7,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,30071,34,Phillips County,MT,33.8,1337,1337,40.1,1295,43,618,67.5,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/24/21,26007,34,Alpena County,MI,44.8,12719,12719,50.9,12338,53.4,5137,75.8,93.8,13422,47.3,13421,53.8,13289,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/24/21,26013,34,Baraga County,MI,48.8,4002,4001,54.4,3927,57.7,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,41029,34,Jackson County,OR,46,101651,101500,53.2,97079,55.2,37627,75.5,97.7,115101,52.1,114875,60.2,109032,62,40448,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,49041,34,Sevier County,UT,30.6,6607,6607,37.4,6328,41.2,2597,72.8,97.2,8050,37.2,8047,45.6,7567,49.2,2921,81.9,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/24/21,UNK,34,Unknown County,UNK,0,785133,785100,0,730087,0,55397,0,0,1113646,0,1113584,0,1030330,0,75003,0,,,,, +8/23/21,41029,34,Jackson County,OR,45.9,101501,101350,53.1,96953,55.1,37621,75.5,97.7,114861,52,114635,60,108835,61.9,40432,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/23/21,49041,34,Sevier County,UT,30.3,6557,6557,37.1,6295,41,2586,72.5,97.2,7920,36.6,7918,44.8,7478,48.7,2913,81.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/23/21,26007,34,Alpena County,MI,44.8,12714,12714,50.9,12334,53.4,5137,75.8,93.8,13422,47.3,13421,53.8,13289,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/23/21,30071,34,Phillips County,MT,33.8,1336,1336,40.1,1294,42.9,617,67.4,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/23/21,26013,34,Baraga County,MI,48.7,4001,4000,54.4,3926,57.6,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/23/21,UNK,34,Unknown County,UNK,0,783092,783059,0,728591,0,55262,0,0,1111277,0,1111215,0,1028562,0,74699,0,,,,, +8/21/21,49041,33,Sevier County,UT,30.3,6557,6557,37.1,6295,41,2586,72.5,97.2,7919,36.6,7917,44.8,7477,48.7,2913,81.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/21/21,26013,33,Baraga County,MI,48.7,3997,3996,54.4,3922,57.6,1438,77,93.8,4224,51.5,4223,57.5,4155,61,1480,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/21/21,30071,33,Phillips County,MT,33.8,1335,1335,40.1,1293,42.9,617,67.4,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/21/21,41029,33,Jackson County,OR,45.7,101069,100918,52.9,96575,54.9,37554,75.3,97.7,114032,51.6,113806,59.6,108142,61.5,40324,80.9,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/21/21,26007,33,Alpena County,MI,44.7,12697,12697,50.9,12320,53.4,5133,75.7,93.8,13418,47.2,13417,53.7,13285,57.5,5592,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/21/21,UNK,33,Unknown County,UNK,0,777853,777820,0,724853,0,55045,0,0,1103847,0,1103785,0,1023256,0,74303,0,,,,, +8/19/21,26013,33,Baraga County,MI,48.6,3991,3990,54.3,3916,57.5,1437,77,93.8,4220,51.4,4219,57.4,4151,60.9,1480,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,41029,33,Jackson County,OR,45.6,100654,100503,52.6,96215,54.7,37479,75.2,97.7,113175,51.2,112949,59.2,107380,61.1,40201,80.6,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,UNK,33,Unknown County,UNK,0,772959,772926,0,720903,0,54789,0,0,1096323,0,1096261,0,1017125,0,73976,0,,,,, +8/19/21,30071,33,Phillips County,MT,33.5,1326,1326,39.8,1284,42.6,615,67.1,95.6,1501,38,1499,45,1438,47.7,664,72.5,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,49041,33,Sevier County,UT,30.2,6526,6526,37,6267,40.8,2576,72.2,97.1,7827,36.2,7825,44.3,7395,48.1,2904,81.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/19/21,26007,33,Alpena County,MI,44.6,12662,12662,50.7,12296,53.3,5130,75.7,93.8,13406,47.2,13405,53.7,13276,57.5,5591,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/small.csv b/cdc_vaccines/tests/test_data/small.csv new file mode 100644 index 000000000..d4243a035 --- /dev/null +++ b/cdc_vaccines/tests/test_data/small.csv @@ -0,0 +1,109 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/19/21,72121,33,Sabana Grande Municipio,PR,66.1,14360,14360,74.2,13402,75.2,4236,99.9,96,16305,75.1,16305,84.2,15052,84.5,4587,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,72129,33,San Lorenzo Municipio,PR,56.9,20489,20481,63.8,18994,64.3,4771,78,96,24048,66.8,24031,74.9,22111,74.9,5805,94.9,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/19/21,55031,33,Douglas County,WI,35.2,15169,15169,40.3,14557,41.9,5178,62.5,97,18005,41.7,18005,47.9,17155,49.4,5778,69.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,47067,33,Hancock County,TN,24.7,1634,1634,28.6,1613,30.8,676,47,97.8,1818,27.5,1818,31.8,1789,34.2,726,50.5,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/19/21,UNK,33,Unknown County,UNK,0,772959,772926,0,720903,0,54789,0,0,1096323,0,1096261,0,1017125,0,73976,0,,,,, +8/19/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/19/21,49031,33,Piute County,UT,29.7,439,439,33.7,435,38.7,244,58.7,97.1,527,35.6,527,40.5,518,46.1,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,1043,33,Cullman County,AL,27.7,23211,23210,32.5,22877,35.2,9906,63,91.9,29475,35.2,29470,41.3,28641,44.1,11151,70.9,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/19/21,UNK,33,Unknown County,TN,0,59660,59642,0,58874,0,15125,0,97.8,74682,0,74626,0,72349,0,17334,0,,,,, +8/19/21,18109,33,Morgan County,IN,44.5,31398,31397,51.9,30057,55,10392,85.2,98.6,33772,47.9,33771,55.8,32057,58.6,10663,87.4,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/19/21,56009,33,Converse County,WY,25.1,3466,3466,29.9,3397,32.5,1436,60.4,96.4,3971,28.7,3970,34.2,3864,37,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/19/21,18099,33,Marshall County,IN,36.7,16967,16967,43.6,16139,46.5,6135,72.6,98.6,18507,40,18505,47.6,17320,49.9,6355,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/18/21,49031,33,Piute County,UT,29.7,439,439,33.7,435,38.7,244,58.7,97.1,527,35.6,527,40.5,518,46.1,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/18/21,55031,33,Douglas County,WI,35.1,15136,15136,40.2,14529,41.9,5171,62.4,97,17954,41.6,17954,47.7,17116,49.3,5773,69.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/18/21,72129,33,San Lorenzo Municipio,PR,56.9,20465,20457,63.7,18973,64.2,4768,77.9,96,23962,66.6,23945,74.6,22040,74.6,5801,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/18/21,56009,33,Converse County,WY,25.1,3465,3465,29.8,3396,32.5,1436,60.4,96.4,3963,28.7,3962,34.1,3856,36.9,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/18/21,1043,33,Cullman County,AL,27.6,23088,23087,32.4,22765,35,9883,62.8,91.9,29328,35,29323,41.1,28502,43.9,11133,70.8,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/18/21,18109,33,Morgan County,IN,44.5,31348,31347,51.8,30021,54.9,10388,85.1,98.6,33713,47.8,33712,55.7,32002,58.5,10656,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/18/21,72121,33,Sabana Grande Municipio,PR,65.9,14319,14319,73.9,13369,75,4233,99.9,96,16232,74.8,16232,83.8,14988,84.1,4574,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,18099,33,Marshall County,IN,36.6,16939,16939,43.5,16112,46.4,6128,72.5,98.6,18474,39.9,18472,47.5,17293,49.8,6355,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,UNK,33,Unknown County,UNK,0,770751,770718,0,719193,0,54698,0,0,1092878,0,1092816,0,1014393,0,73827,0,,,,, +8/18/21,UNK,33,Unknown County,TN,0,59506,59488,0,58728,0,15089,0,97.8,74350,0,74295,0,72040,0,17267,0,,,,, +8/18/21,47067,33,Hancock County,TN,24.6,1631,1631,28.5,1610,30.7,675,46.9,97.8,1810,27.3,1810,31.6,1783,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/17/21,UNK,33,Unknown County,UNK,0,768634,768601,0,717447,0,54645,0,0,1089178,0,1089116,0,1011387,0,73711,0,,,,, +8/17/21,18099,33,Marshall County,IN,36.5,16891,16891,43.4,16078,46.3,6123,72.4,98.6,18392,39.8,18390,47.3,17259,49.7,6354,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/17/21,55031,33,Douglas County,WI,35,15121,15121,40.2,14515,41.8,5168,62.4,97,17936,41.6,17936,47.7,17098,49.3,5771,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/17/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/17/21,72129,33,San Lorenzo Municipio,PR,56.9,20465,20457,63.7,18973,64.2,4768,77.9,96,23962,66.6,23945,74.6,22040,74.6,5801,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/17/21,UNK,33,Unknown County,TN,0,59345,59327,0,58576,0,15053,0,97.8,74078,0,74023,0,71776,0,17214,0,,,,, +8/17/21,56009,33,Converse County,WY,25.1,3463,3463,29.8,3394,32.5,1436,60.4,96.4,3961,28.7,3960,34.1,3854,36.9,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/17/21,47067,33,Hancock County,TN,24.6,1629,1629,28.5,1608,30.7,675,46.9,97.8,1806,27.3,1806,31.6,1779,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/17/21,18109,33,Morgan County,IN,44.4,31316,31315,51.7,29996,54.9,10387,85.1,98.6,33662,47.8,33661,55.6,31960,58.4,10651,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/17/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/17/21,1043,33,Cullman County,AL,27.5,23032,23031,32.3,22712,35,9875,62.8,91.9,29254,34.9,29249,41,28431,43.8,11126,70.7,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/17/21,72121,33,Sabana Grande Municipio,PR,65.9,14319,14319,73.9,13369,75,4233,99.9,96,16232,74.8,16232,83.8,14988,84.1,4574,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,UNK,33,Unknown County,TN,0,59321,59303,0,58555,0,15048,0,97.8,73946,0,73891,0,71657,0,17203,0,,,,, +8/16/21,18099,33,Marshall County,IN,36.5,16879,16879,43.4,16068,46.3,6122,72.4,98.6,18369,39.7,18367,47.2,17239,49.7,6351,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,1043,33,Cullman County,AL,27.4,22971,22970,32.2,22657,34.9,9869,62.7,91.9,29137,34.8,29132,40.9,28333,43.6,11111,70.6,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/16/21,72129,33,San Lorenzo Municipio,PR,56.8,20426,20418,63.6,18941,64.1,4761,77.8,96,23869,66.3,23852,74.3,21971,74.4,5797,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/16/21,55031,33,Douglas County,WI,35,15120,15120,40.2,14515,41.8,5168,62.4,97,17928,41.5,17928,47.7,17090,49.2,5771,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/16/21,72121,33,Sabana Grande Municipio,PR,65.9,14305,14305,73.9,13359,75,4231,99.9,96,16159,74.4,16159,83.4,14924,83.8,4561,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,56009,33,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3902,28.2,3901,33.6,3809,36.5,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/16/21,18109,33,Morgan County,IN,44.4,31271,31270,51.6,29963,54.8,10384,85.1,98.6,33622,47.7,33621,55.5,31929,58.4,10648,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/16/21,UNK,33,Unknown County,UNK,0,766588,766555,0,716121,0,54544,0,0,1085417,0,1085355,0,1008952,0,73510,0,,,,, +8/16/21,47067,33,Hancock County,TN,24.6,1629,1629,28.5,1608,30.7,676,47,97.8,1805,27.3,1805,31.6,1778,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/16/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/16/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,72129,33,San Lorenzo Municipio,PR,56.7,20413,20405,63.6,18928,64.1,4755,77.7,96,23830,66.2,23813,74.2,21945,74.3,5792,94.7,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/15/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/15/21,47067,33,Hancock County,TN,24.6,1628,1628,28.5,1607,30.7,676,47,97.8,1804,27.3,1804,31.5,1777,33.9,723,50.2,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/15/21,1043,33,Cullman County,AL,27.3,22861,22860,32.1,22570,34.7,9861,62.7,91.9,28943,34.6,28938,40.6,28169,43.4,11095,70.5,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/15/21,18109,33,Morgan County,IN,44.3,31220,31219,51.6,29919,54.7,10381,85.1,98.6,33573,47.6,33572,55.5,31893,58.3,10645,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/15/21,55031,33,Douglas County,WI,34.9,15080,15080,40.1,14483,41.7,5162,62.3,97,17875,41.4,17875,47.5,17057,49.1,5765,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,72121,33,Sabana Grande Municipio,PR,65.9,14299,14299,73.8,13355,75,4230,99.9,96,16119,74.2,16119,83.2,14893,83.6,4555,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/15/21,56009,33,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3901,28.2,3900,33.6,3808,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/15/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,UNK,33,Unknown County,UNK,0,764575,764542,0,714649,0,54426,0,0,1081257,0,1081195,0,1005831,0,73305,0,,,,, +8/15/21,UNK,33,Unknown County,TN,0,59136,59118,0,58379,0,15004,0,97.8,73626,0,73571,0,71349,0,17149,0,,,,, +8/15/21,18099,33,Marshall County,IN,36.5,16868,16868,43.3,16059,46.3,6122,72.4,98.6,18335,39.6,18333,47.1,17209,49.6,6350,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,47067,32,Hancock County,TN,24.6,1626,1626,28.4,1605,30.6,676,47,97.8,1797,27.1,1797,31.4,1771,33.8,721,50.1,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/14/21,18099,32,Marshall County,IN,36.4,16854,16854,43.3,16047,46.2,6120,72.4,98.6,18308,39.6,18306,47,17186,49.5,6347,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,72121,32,Sabana Grande Municipio,PR,65.8,14282,14282,73.8,13340,74.9,4230,99.9,96,16094,74.1,16094,83.1,14873,83.5,4554,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,55031,32,Douglas County,WI,34.9,15080,15080,40.1,14483,41.7,5162,62.3,97,17875,41.4,17875,47.5,17057,49.1,5765,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/14/21,18109,32,Morgan County,IN,44.2,31191,31190,51.5,29898,54.7,10380,85.1,98.6,33520,47.6,33519,55.4,31853,58.3,10643,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/14/21,72129,32,San Lorenzo Municipio,PR,56.6,20380,20372,63.5,18902,64,4751,77.7,96,23744,66,23727,73.9,21886,74.1,5789,94.6,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/14/21,UNK,32,Unknown County,UNK,0,762688,762656,0,713187,0,54336,0,0,1077619,0,1077559,0,1002992,0,73097,0,,,,, +8/14/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/14/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3901,28.2,3900,33.6,3808,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/14/21,1043,32,Cullman County,AL,27.2,22777,22776,31.9,22497,34.6,9848,62.6,91.8,28740,34.3,28735,40.3,27994,43.1,11077,70.4,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/14/21,UNK,32,Unknown County,TN,0,59055,59037,0,58308,0,14997,0,97.8,73414,0,73359,0,71149,0,17126,0,,,,, +8/14/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,18099,32,Marshall County,IN,36.4,16832,16832,43.3,16028,46.2,6117,72.3,98.6,18263,39.5,18261,46.9,17147,49.4,6343,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/13/21,1043,32,Cullman County,AL,27.2,22758,22757,31.9,22480,34.6,9843,62.6,91.8,28693,34.3,28688,40.2,27952,43,11073,70.4,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/13/21,47067,32,Hancock County,TN,24.5,1624,1624,28.4,1605,30.6,676,47,97.8,1797,27.1,1797,31.4,1771,33.8,721,50.1,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/13/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3900,28.2,3899,33.6,3807,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/13/21,UNK,32,Unknown County,TN,0,58888,58870,0,58148,0,14972,0,97.8,73059,0,73004,0,70807,0,17061,0,,,,, +8/13/21,18109,32,Morgan County,IN,44.2,31134,31133,51.4,29852,54.6,10377,85.1,98.6,33443,47.4,33442,55.2,31787,58.1,10635,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/13/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,72129,32,San Lorenzo Municipio,PR,56.5,20335,20327,63.3,18865,63.9,4746,77.6,96,23666,65.8,23649,73.7,21826,73.9,5788,94.6,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/13/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/13/21,55031,32,Douglas County,WI,34.9,15058,15058,40,14466,41.7,5158,62.2,97,17847,41.4,17847,47.5,17031,49.1,5762,69.5,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,72121,32,Sabana Grande Municipio,PR,65.6,14253,14253,73.6,13316,74.7,4225,99.9,96,16071,74,16071,83,14852,83.4,4550,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/13/21,UNK,32,Unknown County,UNK,0,760315,760283,0,711222,0,54262,0,0,1072813,0,1072753,0,998950,0,72929,0,,,,, +8/12/21,UNK,32,Unknown County,UNK,0,758305,758273,0,709538,0,54190,0,0,1068845,0,1068785,0,995682,0,72750,0,,,,, +8/12/21,18099,32,Marshall County,IN,36.4,16817,16817,43.2,16013,46.1,6116,72.3,98.6,18242,39.4,18240,46.9,17129,49.4,6341,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/12/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/12/21,72129,32,San Lorenzo Municipio,PR,56.4,20296,20288,63.2,18833,63.8,4740,77.5,96,23615,65.6,23598,73.5,21786,73.8,5784,94.5,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/12/21,72121,32,Sabana Grande Municipio,PR,65.6,14234,14234,73.5,13303,74.7,4220,99.9,96,16057,74,16057,82.9,14840,83.3,4548,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/12/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/12/21,55031,32,Douglas County,WI,34.9,15040,15040,40,14452,41.6,5153,62.2,97,17820,41.3,17820,47.4,17007,49,5756,69.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/12/21,18109,32,Morgan County,IN,44.1,31079,31078,51.3,29808,54.5,10372,85,98.6,33362,47.3,33361,55.1,31719,58,10630,87.1,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/12/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3899,28.2,3898,33.6,3806,36.4,1589,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/12/21,UNK,32,Unknown County,TN,0,58763,58745,0,58026,0,14940,0,97.8,72726,0,72672,0,70489,0,17016,0,,,,, +8/12/21,1043,32,Cullman County,AL,27.1,22693,22692,31.8,22421,34.5,9833,62.5,91.8,28535,34.1,28530,40,27817,42.8,11062,70.3,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/12/21,47067,32,Hancock County,TN,24.5,1623,1623,28.4,1604,30.6,676,47,97.8,1795,27.1,1795,31.4,1769,33.8,720,50,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/11/21,18099,32,Marshall County,IN,36.3,16794,16794,43.2,15996,46.1,6113,72.3,98.6,18199,39.3,18197,46.8,17097,49.3,6338,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/11/21,72121,32,Sabana Grande Municipio,PR,65.2,14166,14166,73.2,13266,74.5,4214,99.9,96,16023,73.8,16023,82.7,14818,83.2,4545,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/11/21,72129,32,San Lorenzo Municipio,PR,56.3,20279,20271,63.2,18821,63.7,4738,77.4,96,23592,65.6,23575,73.4,21767,73.7,5783,94.5,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/11/21,UNK,32,Unknown County,TN,0,58601,58583,0,57870,0,14895,0,97.8,72472,0,72418,0,70251,0,16956,0,,,,, +8/11/21,47067,32,Hancock County,TN,24.5,1622,1622,28.4,1603,30.6,675,46.9,97.8,1794,27.1,1794,31.4,1768,33.8,720,50,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/11/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/11/21,UNK,32,Unknown County,UNK,0,756368,756338,0,707922,0,54140,0,0,1065083,0,1065026,0,992587,0,72616,0,,,,, +8/11/21,55031,32,Douglas County,WI,34.8,15014,15014,39.9,14434,41.6,5147,62.1,97,17792,41.2,17792,47.3,16986,48.9,5754,69.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/11/21,56009,32,Converse County,WY,24.8,3429,3429,29.5,3365,32.2,1429,60.1,96.4,3895,28.2,3894,33.5,3802,36.4,1587,66.7,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/11/21,18109,32,Morgan County,IN,44,31047,31046,51.3,29782,54.5,10371,85,98.6,33318,47.3,33317,55,31682,57.9,10627,87.1,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/11/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/11/21,1043,32,Cullman County,AL,27,22603,22602,31.7,22338,34.4,9821,62.4,91.8,28324,33.8,28319,39.7,27622,42.5,11036,70.1,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/small_confirmed.csv b/cdc_vaccines/tests/test_data/small_confirmed.csv new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/tests/test_data/small_pull.csv b/cdc_vaccines/tests/test_data/small_pull.csv new file mode 100644 index 000000000..812835e37 --- /dev/null +++ b/cdc_vaccines/tests/test_data/small_pull.csv @@ -0,0 +1,4 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/26/21,UNK,34,Unknown County,UNK,0,789625,789591,0,733809,0,55620,0,0,1119266,0,1119203,0,1035082,0,75596,0,,,,, +8/26/21,32013,34,Humboldt County,NV,32.9,5537,5535,40.2,5368,43.6,1696,69.9,94.9,6293,37.4,6290,45.6,6014,48.9,1877,77.3,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, \ No newline at end of file diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py new file mode 100644 index 000000000..f49d0eff6 --- /dev/null +++ b/cdc_vaccines/tests/test_pull.py @@ -0,0 +1,71 @@ +"""Tests for running the CDC Vaccine indicator.""" +import pytest +import logging +from unittest.mock import patch + +import pandas as pd +import numpy as np +from delphi_cdc_vaccines.pull import pull_cdcvacc_data + +from test_run import local_fetch + +BASE_URL_GOOD = "test_data/small_pull.csv" + +BASE_URL_BAD = { + "missing_days": "test_data/bad_missing_days.csv", + "missing_cols": "test_data/bad_missing_cols.csv", + "extra_cols": "test_data/bad_extra_cols.csv" +} + +TEST_LOGGER = logging.getLogger() + +class TestPullCDCVaccines: + """Tests for the `pull_cdcvacc_data()` function.""" + def test_good_file(self): + """Test the expected output from a smaller file.""" + df = pull_cdcvacc_data(BASE_URL_GOOD, TEST_LOGGER) + expected_df = pd.DataFrame({ + "fips": ["00000","00000","32013","32013","48305","48305"], + "timestamp": [pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), + pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), + pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26")], + "cumulative_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "cumulative_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "cumulative_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "cumulative_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], + "counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + index=[0, 1, 2, 3, 4, 5]) + # sort since rows order doesn't matter + pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index()) + + def test_missing_days(self): + """Test if error is raised when there are missing days.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_days"], TEST_LOGGER + ) + + def test_missing_cols(self): + """Test if error is raised when there are missing columns.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_cols"],TEST_LOGGER + ) + + def test_extra_cols(self): + """Test if error is raised when there are extra columns.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["extra_cols"], TEST_LOGGER + ) \ No newline at end of file diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py new file mode 100644 index 000000000..f65f00764 --- /dev/null +++ b/cdc_vaccines/tests/test_run.py @@ -0,0 +1,80 @@ +"""Tests for running the CDC Vaccine indicator.""" +from itertools import product +from os import listdir +from os.path import join +from unittest.mock import patch + +import pandas as pd + +from delphi_cdc_vaccines.run import run_module + +def local_fetch(url, cache): + return pd.read_csv(url) + +class TestRun: + """Tests for the `run_module()` function.""" + PARAMS = { + "common": { + "export_dir": "./receiving", + "input_dir": "./input_cache" + }, + "indicator": { + "base_url": "./test_data/small.csv", + "export_start_date": "2021-05-01" + } + } + + def test_output_files_exist(self): + """Test that the expected output files exist.""" + run_module(self.PARAMS) + + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + + dates = [ + "20210810", + "20210811", + "20210812", + "20210813", + "20210814", + "20210815", + "20210816", + "20210817", + "20210818", + "20210819", + ] + geos = ["state_code", "hrr", "hhs", "nation"] + + expected_files = [] + for metric in ["cumulative_counts_tot_vaccine", + "counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "counts_part_vaccine_65P"]: + for date in dates: + for geo in geos: + expected_files += [date + "_" + geo + "_" + metric + ".csv"] + if not("cumulative" in metric) and not (date in dates[:6]): + expected_files += [date + "_" + geo + "_" + metric + "_7dav.csv"] + + + assert set(csv_files) == set(expected_files) + + def test_output_file_format(self): + """Test that the output files have the proper format.""" + run_module(self.PARAMS) + + df = pd.read_csv( + join("receiving", "20210819_state_code_counts_tot_vaccine.csv") + ) + assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file From e459efc7393b87a4bf6ffb7622fdba85f8b2807b Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:31:42 -0400 Subject: [PATCH 02/95] added explicit dictionary creation --- cdc_vaccines/delphi_cdc_vaccines/run.py | 6 ++++++ cdc_vaccines/tests/test_run.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 455ca44d2..93ee75886 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -43,6 +43,11 @@ def run_module(params): all_data = pull_cdcvacc_data(base_url, logger) run_stats = [] ## aggregate & smooth + + + if not os.path.exists(params["common"]["export_dir"]): + os.makedirs(params["common"]["export_dir"]) + for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): logger.info("Running on ", sensor=sensor, @@ -60,6 +65,7 @@ def run_module(params): df["se"] = np.nan df["sample_size"] = np.nan sensor_name = sensor + smoother[1] + if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index f65f00764..e1ecbb17f 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -20,7 +20,7 @@ class TestRun: }, "indicator": { "base_url": "./test_data/small.csv", - "export_start_date": "2021-05-01" + "export_start_date": "2021-08-10" } } From f9a63292615055d41298948f238629d4613b969e Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:36:00 -0400 Subject: [PATCH 03/95] added os import --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 93ee75886..9a6f9cdae 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -18,6 +18,7 @@ from itertools import product import time as tm import numpy as np +import os from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger From 22303faa9015dd8d8a3e8aa91529bb3c51cd0969 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:43:08 -0400 Subject: [PATCH 04/95] Minor changes for the linter - tests pass locally --- cdc_vaccines/Makefile | 6 +++++- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cdc_vaccines/Makefile b/cdc_vaccines/Makefile index ca3013248..5d1101c53 100644 --- a/cdc_vaccines/Makefile +++ b/cdc_vaccines/Makefile @@ -3,7 +3,7 @@ dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*') venv: - python -m venv env + python3.8 -m venv env install: venv . env/bin/activate; \ @@ -23,3 +23,7 @@ clean: rm -rf env rm -f params.json +run: + env/bin/python -m $(dir) + env/bin/python -m delphi_utils.validator --dry_run + env/bin/python -m delphi_utils.archive \ No newline at end of file diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 9a6f9cdae..c82ee3b16 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -17,8 +17,8 @@ from datetime import timedelta, datetime from itertools import product import time as tm -import numpy as np import os +import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger From 93a998295e390ba524f9ec72bf486504d35ee1e8 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:17:55 -0400 Subject: [PATCH 05/95] minor changes --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 9 ++++++++- cdc_vaccines/delphi_cdc_vaccines/pull.py | 11 ++--------- cdc_vaccines/delphi_cdc_vaccines/run.py | 7 ++++++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 113ecb69e..2896a1f8a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -43,7 +43,7 @@ GEOS = [ "nation", - "state_code", + "state", "hrr", "hhs" ] @@ -52,3 +52,10 @@ (Smoother("identity", impute_method=None), ""), (Smoother("moving_average", window_length=7), "_7dav"), ] + + +DIFFERENCE_MAPPING = { + f"{INCIDENCE}_counts_{completeness}_vaccine{age}": f"{CUMULATIVE}_counts_{completeness}_vaccine{age}" + for completeness, age in product(COMPLETENESS, AGE) +} +SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) \ No newline at end of file diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index e19043163..32a53a4d7 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -112,20 +112,13 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df =pd.concat([df_dummy, df]) # Obtain new_counts df.sort_values(["fips", "timestamp"], inplace=True) - df["counts_tot_vaccine"] = df["cumulative_counts_tot_vaccine"].diff() # 1st discrete difference - df["counts_tot_vaccine_12P"] = df["cumulative_counts_tot_vaccine_12P"].diff() - df["counts_tot_vaccine_18P"] = df["cumulative_counts_tot_vaccine_18P"].diff() - df["counts_tot_vaccine_65P"] = df["cumulative_counts_tot_vaccine_65P"].diff() - df["counts_part_vaccine"] = df["cumulative_counts_part_vaccine"].diff() - df["counts_part_vaccine_12P"] = df["cumulative_counts_part_vaccine_12P"].diff() - df["counts_part_vaccine_18P"] = df["cumulative_counts_part_vaccine_18P"].diff() - df["counts_part_vaccine_65P"] = df["cumulative_counts_part_vaccine_65P"].diff() + for to, from in DIFFERENCE_MAPPING.items(): + df[to] = df[from].diff() rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] # Handle edge cases where we diffed across fips mask = df["fips"] != df["fips"].shift(1) df.loc[mask, rem_list] = np.nan - print(rem_list) df.reset_index(inplace=True, drop=True) # Final sanity checks unique_days = df["timestamp"].unique() diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index c82ee3b16..f5e4751ac 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -50,15 +50,20 @@ def run_module(params): os.makedirs(params["common"]["export_dir"]) for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): + logger.info("Running on ", sensor=sensor, smoother=smoother, geo=geo) + geo_map = geo + if geo=='state': + geo_map='state_code' + df = GeoMapper().replace_geocode( all_data[['timestamp','fips', sensor]],from_col='fips', from_code="fips", new_col="geo_id", - new_code=geo, + new_code=geo_map, date_col="timestamp") df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth From 014da6d32579d28b914872d60ad354588fcfa3f9 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:49:39 -0400 Subject: [PATCH 06/95] Update cdc_vaccines/delphi_cdc_vaccines/__main__.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/__main__.py b/cdc_vaccines/delphi_cdc_vaccines/__main__.py index 32fc0eecc..0aa3f6ac1 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/__main__.py +++ b/cdc_vaccines/delphi_cdc_vaccines/__main__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """Call the function run_module when executed. -This file indicates that calling the module (`python -m MODULE_NAME`) will +This file indicates that calling the module (`python -m delphi_cdc_vaccines`) will call the function `run_module` found within the run.py file. There should be no need to change this template. """ From 33fce0ca58b88c2aa8946ff8f8586b5f2f54e203 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:49:51 -0400 Subject: [PATCH 07/95] Update cdc_vaccines/delphi_cdc_vaccines/constants.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 2896a1f8a..6e8a9620c 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -45,7 +45,8 @@ "nation", "state", "hrr", - "hhs" + "hhs", + "msa" ] SMOOTHERS = [ From cfc4a3d775e9e76284e078e312a15c91d51f353d Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:50:41 -0400 Subject: [PATCH 08/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index b56682d0f..bcbcee972 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -2,7 +2,7 @@ This indicator provides the number official vaccinations in the US. We export the county-level daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. -We also aggregate the data to the HHS, State, and Nation levels. +We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. Note that individuals could be vaccinated outside of the US. Additionally, From 76843dd7bdf065da01bdf78b30071a282aefb46f Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:51:55 -0400 Subject: [PATCH 09/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index bcbcee972..3423ca851 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -1,7 +1,7 @@ # CDC Vaccinations This indicator provides the number official vaccinations in the US. We export the county-level -daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. +daily vaccination rates data as-is, and publish the result as a COVIDcast signal. We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. From 09382c7cb5b4516e33826e7815f667e6a67067d9 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:52:19 -0400 Subject: [PATCH 10/95] Update cdc_vaccines/params.json.template Co-authored-by: Katie Mazaitis --- cdc_vaccines/params.json.template | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template index 3d5337846..bfc654514 100644 --- a/cdc_vaccines/params.json.template +++ b/cdc_vaccines/params.json.template @@ -23,13 +23,4 @@ }, "dynamic": {} }, - "archive": { - "aws_credentials": { - "aws_access_key_id": "", - "aws_secret_access_key": "" - }, - "bucket_name": "", - "indicator_prefix": "usafacts", - "cache_dir": "./cache" - } } From 7f903bd2f2658de75926939afb9befe1f672eaf1 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:03:37 -0400 Subject: [PATCH 11/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index 3423ca851..ea076dc1e 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -1,6 +1,6 @@ # CDC Vaccinations -This indicator provides the number official vaccinations in the US. We export the county-level +This indicator provides the official vaccination counts in the US. We export the county-level daily vaccination rates data as-is, and publish the result as a COVIDcast signal. We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. From d05d5e292533fc9f8b4256ce6b13c1817402ca8c Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:10:39 -0400 Subject: [PATCH 12/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index f5e4751ac..0e52a9584 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -60,7 +60,8 @@ def run_module(params): geo_map='state_code' df = GeoMapper().replace_geocode( - all_data[['timestamp','fips', sensor]],from_col='fips', + all_data[['timestamp','fips', sensor]], + from_col='fips', from_code="fips", new_col="geo_id", new_code=geo_map, From 8618e55bd0e689e8be1a8d8b7163abdfc21e0ebd Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:12:35 -0400 Subject: [PATCH 13/95] Update cdc_vaccines/delphi_cdc_vaccines/pull.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index 32a53a4d7..e158a0b8c 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -132,24 +132,6 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ) return df.loc[ df["timestamp"] >= min(df["timestamp"]), - [ # Reorder - "fips", - "timestamp", - "cumulative_counts_tot_vaccine", - "counts_tot_vaccine", - "cumulative_counts_tot_vaccine_12P", - "counts_tot_vaccine_12P", - "cumulative_counts_tot_vaccine_18P", - "counts_tot_vaccine_18P", - "cumulative_counts_tot_vaccine_65P", - "counts_tot_vaccine_65P", - "cumulative_counts_part_vaccine", - "counts_part_vaccine", - "cumulative_counts_part_vaccine_12P", - "counts_part_vaccine_12P", - "cumulative_counts_part_vaccine_18P", - "counts_part_vaccine_18P", - "cumulative_counts_part_vaccine_65P", - "counts_part_vaccine_65P" - ], + # Reorder + ["fips", "timestamp"] + SIGNALS, ].reset_index(drop=True) From e537b3597e0f4a61be224fd97a7eef2005707c29 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:22:05 -0400 Subject: [PATCH 14/95] changes to the json file --- cdc_vaccines/params.json.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template index bfc654514..3e6af1c23 100644 --- a/cdc_vaccines/params.json.template +++ b/cdc_vaccines/params.json.template @@ -22,5 +22,5 @@ "missing_sample_size_allowed": true }, "dynamic": {} - }, + } } From 6054dcde4455e008ee82e5fccf964a2400e5d72c Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sat, 11 Sep 2021 13:22:43 -0400 Subject: [PATCH 15/95] changed the signal name generation --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 7 +++---- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- cdc_vaccines/tests/test_pull.py | 23 ++++++++++++++--------- cdc_vaccines/tests/test_run.py | 21 ++++++++++----------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index e158a0b8c..b8e8c6387 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -5,8 +5,7 @@ from delphi_utils.geomap import GeoMapper import numpy as np import pandas as pd - - +from .constants import SIGNALS, DIFFERENCE_MAPPING @@ -112,8 +111,8 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df =pd.concat([df_dummy, df]) # Obtain new_counts df.sort_values(["fips", "timestamp"], inplace=True) - for to, from in DIFFERENCE_MAPPING.items(): - df[to] = df[from].diff() + for to, from_d in DIFFERENCE_MAPPING.items(): + df[to] = df[from_d].diff() rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] # Handle edge cases where we diffed across fips diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 0e52a9584..48c0f24f9 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -72,7 +72,7 @@ def run_module(params): df["se"] = np.nan df["sample_size"] = np.nan sensor_name = sensor + smoother[1] - + print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index f49d0eff6..644f1226c 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -29,22 +29,27 @@ def test_good_file(self): "timestamp": [pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26")], + + "incidence_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "incidence_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "incidence_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "incidence_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "incidence_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "incidence_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "incidence_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "incidence_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], + + + "cumulative_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], - "counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], "cumulative_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], - "counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], "cumulative_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], - "counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], "cumulative_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], - "counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], "cumulative_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], - "counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], "cumulative_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], - "counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], "cumulative_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], - "counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], - "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], - "counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + index=[0, 1, 2, 3, 4, 5]) # sort since rows order doesn't matter pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index()) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index e1ecbb17f..abc966c25 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -42,32 +42,31 @@ def test_output_files_exist(self): "20210818", "20210819", ] - geos = ["state_code", "hrr", "hhs", "nation"] + geos = ["state", "hrr", "hhs", "nation", "msa"] expected_files = [] for metric in ["cumulative_counts_tot_vaccine", - "counts_tot_vaccine", + "incidence_counts_tot_vaccine", "cumulative_counts_tot_vaccine_12P", - "counts_tot_vaccine_12P", + "incidence_counts_tot_vaccine_12P", "cumulative_counts_tot_vaccine_18P", - "counts_tot_vaccine_18P", + "incidence_counts_tot_vaccine_18P", "cumulative_counts_tot_vaccine_65P", - "counts_tot_vaccine_65P", + "incidence_counts_tot_vaccine_65P", "cumulative_counts_part_vaccine", - "counts_part_vaccine", + "incidence_counts_part_vaccine", "cumulative_counts_part_vaccine_12P", - "counts_part_vaccine_12P", + "incidence_counts_part_vaccine_12P", "cumulative_counts_part_vaccine_18P", - "counts_part_vaccine_18P", + "incidence_counts_part_vaccine_18P", "cumulative_counts_part_vaccine_65P", - "counts_part_vaccine_65P"]: + "incidence_counts_part_vaccine_65P"]: for date in dates: for geo in geos: expected_files += [date + "_" + geo + "_" + metric + ".csv"] if not("cumulative" in metric) and not (date in dates[:6]): expected_files += [date + "_" + geo + "_" + metric + "_7dav.csv"] - assert set(csv_files) == set(expected_files) def test_output_file_format(self): @@ -75,6 +74,6 @@ def test_output_file_format(self): run_module(self.PARAMS) df = pd.read_csv( - join("receiving", "20210819_state_code_counts_tot_vaccine.csv") + join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") ) assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file From c8474318185d0d86e1a41b76fc186fee3b40b0d1 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sat, 11 Sep 2021 13:25:47 -0400 Subject: [PATCH 16/95] committed constants --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 55 +++++-------------- 1 file changed, 13 insertions(+), 42 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 6e8a9620c..b8ff83d06 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -1,45 +1,23 @@ """Registry for variations.""" - +from itertools import product from delphi_utils import Smoother -FULL_VAC_SUM = "cumulative_counts_tot_vaccine" -FULL_VAC = "counts_tot_vaccine" -FULL_VAC_SUM_12P = "cumulative_counts_tot_vaccine_12P" -FULL_VAC_12P = "counts_tot_vaccine_12P" -FULL_VAC_SUM_18P = "cumulative_counts_tot_vaccine_18P" -FULL_VAC_18P = "counts_tot_vaccine_18P" -FULL_VAC_SUM_65P = "cumulative_counts_tot_vaccine_65P" -FULL_VAC_65P = "counts_tot_vaccine_65P" -PART_VAC_SUM = "cumulative_counts_part_vaccine" -PART_VAC = "counts_part_vaccine" -PART_VAC_SUM_12P = "cumulative_counts_part_vaccine_12P" -PART_VAC_12P = "counts_part_vaccine_12P" -PART_VAC_SUM_18P = "cumulative_counts_part_vaccine_18P" -PART_VAC_18P = "counts_part_vaccine_18P" -PART_VAC_SUM_65P = "cumulative_counts_part_vaccine_65P" -PART_VAC_65P = "counts_part_vaccine_65P" +CUMULATIVE = 'cumulative' +INCIDENCE ='incidence' +FREQUENCY = [CUMULATIVE, INCIDENCE] +STATUS = ["tot", "part"] +AGE = ["", "_12P", "_18P", "_65P"] +SIGNALS = [f"{frequency}_counts_{status}_vaccine{AGE}" for + frequency, status, age in product(FREQUENCY, STATUS, AGE)] +DIFFERENCE_MAPPING = { + f"{INCIDENCE}_counts_{status}_vaccine{age}": f"{CUMULATIVE}_counts_{status}_vaccine{age}" + for status, age in product(STATUS, AGE) +} +SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) -SIGNALS = [ - FULL_VAC_SUM , - FULL_VAC , - FULL_VAC_SUM_12P , - FULL_VAC_12P , - FULL_VAC_SUM_18P , - FULL_VAC_18P , - FULL_VAC_SUM_65P , - FULL_VAC_65P , - PART_VAC_SUM , - PART_VAC , - PART_VAC_SUM_12P , - PART_VAC_12P , - PART_VAC_SUM_18P , - PART_VAC_18P , - PART_VAC_SUM_65P , - PART_VAC_65P -] GEOS = [ "nation", @@ -53,10 +31,3 @@ (Smoother("identity", impute_method=None), ""), (Smoother("moving_average", window_length=7), "_7dav"), ] - - -DIFFERENCE_MAPPING = { - f"{INCIDENCE}_counts_{completeness}_vaccine{age}": f"{CUMULATIVE}_counts_{completeness}_vaccine{age}" - for completeness, age in product(COMPLETENESS, AGE) -} -SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) \ No newline at end of file From f4399d6ba01277670cbb9529da8488168e4a7cd2 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 17 Sep 2021 06:30:41 -0400 Subject: [PATCH 17/95] Modified run.py to have the right NaN codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 48c0f24f9..3013c2c5a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -21,7 +21,7 @@ import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper -from delphi_utils import get_structured_logger +from delphi_utils import get_structured_logger, nancodes from .constants import GEOS, SIGNALS, SMOOTHERS from .pull import pull_cdcvacc_data @@ -69,8 +69,8 @@ def run_module(params): df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth ) - df["se"] = np.nan - df["sample_size"] = np.nan + df["se"] = nancodes.Nans.NOT_APPLICABLE + df["sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From e5db4270cb18452a2beb38ff41089012167f3a74 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Mon, 13 Sep 2021 14:08:57 -0400 Subject: [PATCH 18/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index ea076dc1e..792f9070c 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -6,8 +6,7 @@ We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels For detailed information see the files DETAILS.md contained in this directory. Note that individuals could be vaccinated outside of the US. Additionally, -there is no county level data for counties in Texas and Hawaii. There are unknown -counties in each state and a row for unknown county and unknown state. +there is no county level data for counties in Texas and Hawaii. Each state has some vaccination counts assigned to "unknown county". Some vaccination counts are assigned to "unknown state, unknown county". ## Running the Indicator From 8f13b6af660ac1be50758cf73587377c06605b10 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sun, 19 Sep 2021 11:28:00 -0400 Subject: [PATCH 19/95] Added appropriate NaN codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 3013c2c5a..43dee11f5 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -18,7 +18,6 @@ from itertools import product import time as tm import os -import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger, nancodes From ded675f5f4d3ff6911daacb7bc59a8535ef7b19a Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 10:12:13 -0400 Subject: [PATCH 20/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Dmitry Shemetov --- cdc_vaccines/delphi_cdc_vaccines/run.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 43dee11f5..d22dfd76d 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -69,7 +69,10 @@ def run_module(params): smoother[0].smooth ) df["se"] = nancodes.Nans.NOT_APPLICABLE - df["sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["se"] = np.nan + df["sample_size"] = np.nan + df["missing_se"] = nancodes.Nans.NOT_APPLICABLE + df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From 85d47172dd31f71c8e690007b310b9984816e2c3 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 10:24:48 -0400 Subject: [PATCH 21/95] added back appropriate nan codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index d22dfd76d..43dee11f5 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -69,10 +69,7 @@ def run_module(params): smoother[0].smooth ) df["se"] = nancodes.Nans.NOT_APPLICABLE - df["se"] = np.nan - df["sample_size"] = np.nan - df["missing_se"] = nancodes.Nans.NOT_APPLICABLE - df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From 211c3172e7497926936c071f233e628eaa714cdd Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 18:15:41 -0400 Subject: [PATCH 22/95] changes to run.py --- cdc_vaccines/delphi_cdc_vaccines/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 43dee11f5..51ab3688b 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -68,8 +68,8 @@ def run_module(params): df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth ) - df["se"] = nancodes.Nans.NOT_APPLICABLE - df["sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["missing_se"] = nancodes.Nans.NOT_APPLICABLE + df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From 0df9b81c0349dabd351f91c4cf1145c43714fce2 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Mon, 11 Oct 2021 17:48:06 -0400 Subject: [PATCH 23/95] adding test_run changes with new col names --- cdc_vaccines/tests/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index abc966c25..c96e90112 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -76,4 +76,4 @@ def test_output_file_format(self): df = pd.read_csv( join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") ) - assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file + assert (df.columns.values == ["geo_id", "val", "missing_se", "missing_sample_size"]).all() \ No newline at end of file From 7f9956598524795b10dd42a43d5b9a36c0178ebe Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:02:48 -0400 Subject: [PATCH 24/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 51ab3688b..a567f0097 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -72,7 +72,7 @@ def run_module(params): df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) - if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): + if not (("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) exported_csv_dates = create_export_csv( From 1bd6fac87192050e24072a4099e946fae31a076b Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:02:53 -0400 Subject: [PATCH 25/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index a567f0097..57dd3a403 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -71,7 +71,6 @@ def run_module(params): df["missing_se"] = nancodes.Nans.NOT_APPLICABLE df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] - print(sensor_name) if not (("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) From 05c3cbf5b26f757ace1207876c9cabd871ae0cf2 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:03:00 -0400 Subject: [PATCH 26/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 57dd3a403..a8777e433 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -2,7 +2,7 @@ """Functions to call when running the function. This module should contain a function called `run_module`, that is executed -when the module is run with `python -m MODULE_NAME`. `run_module`'s lone argument should be a +when the module is run with `python -m delphi_cdc_vaccines`. `run_module`'s lone argument should be a nested dictionary of parameters loaded from the params.json file. We expect the `params` to have the following structure: - "common": From a14f23c0a21b02dfd1858084f04755d0051ef5bd Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:05:20 -0400 Subject: [PATCH 27/95] lint nit --- cdc_vaccines/delphi_cdc_vaccines/run.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index a8777e433..cc633f62a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -2,9 +2,10 @@ """Functions to call when running the function. This module should contain a function called `run_module`, that is executed -when the module is run with `python -m delphi_cdc_vaccines`. `run_module`'s lone argument should be a -nested dictionary of parameters loaded from the params.json file. We expect the `params` to have -the following structure: +when the module is run with `python -m delphi_cdc_vaccines`. +`run_module`'s lone argument should be a nested dictionary of +parameters loaded from the params.json file. +We expect the `params` to have the following structure: - "common": - "export_dir": str, directory to which the results are exported - "log_filename": (optional) str, path to log file From 17352cff5248c330d17a47d4da1477d7cb143582 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:29:20 -0400 Subject: [PATCH 28/95] Modifying for the changes in the base csv file from the CDC --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index b8e8c6387..9b7d8cf89 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -45,7 +45,8 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "series_complete_pop_pct", "mmwr_week", "recip_county", - "state_id" + "state_id", + "metro_status" ] From 3d66880ed6bf8823b8086d3d9e742d09cb6e3a6b Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:53:04 -0400 Subject: [PATCH 29/95] Changes to the CDC Files and respective changes to tests --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 5 +++-- cdc_vaccines/tests/test_data/small_confirmed.csv | 0 cdc_vaccines/tests/test_pull.py | 7 ------- 3 files changed, 3 insertions(+), 9 deletions(-) delete mode 100644 cdc_vaccines/tests/test_data/small_confirmed.csv diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index 9b7d8cf89..f309064b7 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -45,8 +45,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "series_complete_pop_pct", "mmwr_week", "recip_county", - "state_id", - "metro_status" + "state_id" ] @@ -62,6 +61,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df['recip_state'] = df['recip_state'].str.lower() drop_columns.extend([x for x in df.columns if ("pct" in x) | ("svi" in x)]) + drop_columns.extend(df.columns[22:]) drop_columns = list(set(drop_columns)) df = GeoMapper().add_geocode(df, "state_id", "state_code", from_col="recip_state", new_col="state_id", dropna=False) @@ -82,6 +82,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "schema may have changed. Please investigate and " "amend drop_columns." ) from e + # timestamp: str -> datetime df.columns = ["fips", "cumulative_counts_tot_vaccine", diff --git a/cdc_vaccines/tests/test_data/small_confirmed.csv b/cdc_vaccines/tests/test_data/small_confirmed.csv deleted file mode 100644 index e69de29bb..000000000 diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index 644f1226c..b5f8e3447 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -67,10 +67,3 @@ def test_missing_cols(self): pull_cdcvacc_data( BASE_URL_BAD["missing_cols"],TEST_LOGGER ) - - def test_extra_cols(self): - """Test if error is raised when there are extra columns.""" - with pytest.raises(ValueError): - pull_cdcvacc_data( - BASE_URL_BAD["extra_cols"], TEST_LOGGER - ) \ No newline at end of file From 3184ed29da431017c59037eae07fd1e4fd1ce66b Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 7 Sep 2021 22:15:31 -0400 Subject: [PATCH 30/95] First pass of the CDC Indicator --- .github/workflows/python-ci.yml | 2 +- cdc_vaccines/.pylintrc | 22 +++ cdc_vaccines/Makefile | 25 +++ cdc_vaccines/README.md | 70 ++++++++ cdc_vaccines/REVIEW.md | 38 ++++ cdc_vaccines/cache/.gitignore | 0 cdc_vaccines/delphi_cdc_vaccines/__init__.py | 13 ++ cdc_vaccines/delphi_cdc_vaccines/__main__.py | 12 ++ cdc_vaccines/delphi_cdc_vaccines/constants.py | 54 ++++++ cdc_vaccines/delphi_cdc_vaccines/pull.py | 162 ++++++++++++++++++ cdc_vaccines/delphi_cdc_vaccines/run.py | 84 +++++++++ cdc_vaccines/params.json.template | 35 ++++ cdc_vaccines/setup.py | 29 ++++ cdc_vaccines/static/.gitignore | 0 .../tests/test_data/bad_extra_cols.csv | 8 + .../tests/test_data/bad_missing_cols.csv | 8 + .../tests/test_data/bad_missing_days.csv | 31 ++++ cdc_vaccines/tests/test_data/small.csv | 109 ++++++++++++ .../tests/test_data/small_confirmed.csv | 0 cdc_vaccines/tests/test_data/small_pull.csv | 4 + cdc_vaccines/tests/test_pull.py | 71 ++++++++ cdc_vaccines/tests/test_run.py | 80 +++++++++ 22 files changed, 856 insertions(+), 1 deletion(-) create mode 100644 cdc_vaccines/.pylintrc create mode 100644 cdc_vaccines/Makefile create mode 100644 cdc_vaccines/README.md create mode 100644 cdc_vaccines/REVIEW.md create mode 100644 cdc_vaccines/cache/.gitignore create mode 100644 cdc_vaccines/delphi_cdc_vaccines/__init__.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/__main__.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/constants.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/pull.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/run.py create mode 100644 cdc_vaccines/params.json.template create mode 100644 cdc_vaccines/setup.py create mode 100644 cdc_vaccines/static/.gitignore create mode 100644 cdc_vaccines/tests/test_data/bad_extra_cols.csv create mode 100644 cdc_vaccines/tests/test_data/bad_missing_cols.csv create mode 100644 cdc_vaccines/tests/test_data/bad_missing_days.csv create mode 100644 cdc_vaccines/tests/test_data/small.csv create mode 100644 cdc_vaccines/tests/test_data/small_confirmed.csv create mode 100644 cdc_vaccines/tests/test_data/small_pull.csv create mode 100644 cdc_vaccines/tests/test_pull.py create mode 100644 cdc_vaccines/tests/test_run.py diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index adeb011a6..45b8aea32 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -16,7 +16,7 @@ jobs: if: github.event.pull_request.draft == false strategy: matrix: - packages: [_delphi_utils_python, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph_patterns, sir_complainsalot, usafacts] + packages: [_delphi_utils_python, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph_patterns, sir_complainsalot, usafacts, cdc_vaccines] defaults: run: working-directory: ${{ matrix.packages }} diff --git a/cdc_vaccines/.pylintrc b/cdc_vaccines/.pylintrc new file mode 100644 index 000000000..f30837c7e --- /dev/null +++ b/cdc_vaccines/.pylintrc @@ -0,0 +1,22 @@ + +[MESSAGES CONTROL] + +disable=logging-format-interpolation, + too-many-locals, + too-many-arguments, + # Allow pytest functions to be part of a class. + no-self-use, + # Allow pytest classes to have one test. + too-few-public-methods + +[BASIC] + +# Allow arbitrarily short-named variables. +variable-rgx=[a-z_][a-z0-9_]* +argument-rgx=[a-z_][a-z0-9_]* +attr-rgx=[a-z_][a-z0-9_]* + +[DESIGN] + +# Don't complain about pytest "unused" arguments. +ignored-argument-names=(_.*|run_as_module) \ No newline at end of file diff --git a/cdc_vaccines/Makefile b/cdc_vaccines/Makefile new file mode 100644 index 000000000..ca3013248 --- /dev/null +++ b/cdc_vaccines/Makefile @@ -0,0 +1,25 @@ +.PHONY = venv, lint, test, clean + +dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*') + +venv: + python -m venv env + +install: venv + . env/bin/activate; \ + pip install wheel ; \ + pip install -e ../_delphi_utils_python ;\ + pip install -e . + +lint: + . env/bin/activate; pylint $(dir) + . env/bin/activate; pydocstyle $(dir) + +test: + . env/bin/activate ;\ + (cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing) + +clean: + rm -rf env + rm -f params.json + diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md new file mode 100644 index 000000000..b56682d0f --- /dev/null +++ b/cdc_vaccines/README.md @@ -0,0 +1,70 @@ +# CDC Vaccinations + +This indicator provides the number official vaccinations in the US. We export the county-level +daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. +We also aggregate the data to the HHS, State, and Nation levels. +For detailed information see the files DETAILS.md contained in this directory. + +Note that individuals could be vaccinated outside of the US. Additionally, +there is no county level data for counties in Texas and Hawaii. There are unknown +counties in each state and a row for unknown county and unknown state. + + +## Running the Indicator + +The indicator is run by directly executing the Python module contained in this +directory. The safest way to do this is to create a virtual environment, +installed the common DELPHI tools, and then install the module and its +dependencies. To do this, run the following command from this directory: + +``` +make install +``` + +This command will install the package in editable mode, so you can make changes that +will automatically propagate to the installed package. + +All of the user-changable parameters are stored in `params.json`. To execute +the module and produce the output datasets (by default, in `receiving`), run +the following: + +``` +env/bin/python -m delphi_cdc_vaccines +``` + +If you want to enter the virtual environment in your shell, +you can run `source env/bin/activate`. Run `deactivate` to leave the virtual environment. + +Once you are finished, you can remove the virtual environment and +params file with the following: + +``` +make clean +``` + +## Testing the code + +To run static tests of the code style, run the following command: + +``` +make lint +``` + +Unit tests are also included in the module. To execute these, run the following +command from this directory: + +``` +make test +``` + +To run individual tests, run the following: + +``` +(cd tests && ../env/bin/pytest test_run.py --cov=delphi_ --cov-report=term-missing) +``` + +The output will show the number of unit tests that passed and failed, along +with the percentage of code covered by the tests. + +None of the linting or unit tests should fail, and the code lines that are not covered by unit tests should be small and +should not include critical sub-routines. diff --git a/cdc_vaccines/REVIEW.md b/cdc_vaccines/REVIEW.md new file mode 100644 index 000000000..03f87b17a --- /dev/null +++ b/cdc_vaccines/REVIEW.md @@ -0,0 +1,38 @@ +## Code Review (Python) + +A code review of this module should include a careful look at the code and the +output. To assist in the process, but certainly not in replace of it, please +check the following items. + +**Documentation** + +- [ ] the README.md file template is filled out and currently accurate; it is +possible to load and test the code using only the instructions given +- [ ] minimal docstrings (one line describing what the function does) are +included for all functions; full docstrings describing the inputs and expected +outputs should be given for non-trivial functions + +**Structure** + +- [ ] code should pass lint checks (`make lint`) +- [ ] any required metadata files are checked into the repository and placed +within the directory `static` +- [ ] any intermediate files that are created and stored by the module should +be placed in the directory `cache` +- [ ] final expected output files to be uploaded to the API are placed in the +`receiving` directory; output files should not be committed to the respository +- [ ] all options and API keys are passed through the file `params.json` +- [ ] template parameter file (`params.json.template`) is checked into the +code; no personal (i.e., usernames) or private (i.e., API keys) information is +included in this template file + +**Testing** + +- [ ] module can be installed in a new virtual environment (`make install`) +- [ ] reasonably high level of unit test coverage covering all of the main logic +of the code (e.g., missing coverage for raised errors that do not currently seem +possible to reach are okay; missing coverage for options that will be needed are +not) +- [ ] all unit tests run without errors (`make test`) +- [ ] indicator directory has been added to GitHub CI +(`covidcast-indicators/.github/workflows/python-ci.yml`) diff --git a/cdc_vaccines/cache/.gitignore b/cdc_vaccines/cache/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/delphi_cdc_vaccines/__init__.py b/cdc_vaccines/delphi_cdc_vaccines/__init__.py new file mode 100644 index 000000000..6813595b4 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +"""Module to pull and clean indicators from the CDC source. + +This file defines the functions that are made public by the module. As the +module is intended to be executed though the main method, these are primarily +for testing. +""" + +from __future__ import absolute_import +from . import pull +from . import run + +__version__ = "0.1.0" diff --git a/cdc_vaccines/delphi_cdc_vaccines/__main__.py b/cdc_vaccines/delphi_cdc_vaccines/__main__.py new file mode 100644 index 000000000..32fc0eecc --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/__main__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +"""Call the function run_module when executed. + +This file indicates that calling the module (`python -m MODULE_NAME`) will +call the function `run_module` found within the run.py file. There should be +no need to change this template. +""" + +from delphi_utils import read_params +from .run import run_module # pragma: no cover + +run_module(read_params()) # pragma: no cover diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py new file mode 100644 index 000000000..113ecb69e --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -0,0 +1,54 @@ +"""Registry for variations.""" + + +from delphi_utils import Smoother + +FULL_VAC_SUM = "cumulative_counts_tot_vaccine" +FULL_VAC = "counts_tot_vaccine" +FULL_VAC_SUM_12P = "cumulative_counts_tot_vaccine_12P" +FULL_VAC_12P = "counts_tot_vaccine_12P" +FULL_VAC_SUM_18P = "cumulative_counts_tot_vaccine_18P" +FULL_VAC_18P = "counts_tot_vaccine_18P" +FULL_VAC_SUM_65P = "cumulative_counts_tot_vaccine_65P" +FULL_VAC_65P = "counts_tot_vaccine_65P" +PART_VAC_SUM = "cumulative_counts_part_vaccine" +PART_VAC = "counts_part_vaccine" +PART_VAC_SUM_12P = "cumulative_counts_part_vaccine_12P" +PART_VAC_12P = "counts_part_vaccine_12P" +PART_VAC_SUM_18P = "cumulative_counts_part_vaccine_18P" +PART_VAC_18P = "counts_part_vaccine_18P" +PART_VAC_SUM_65P = "cumulative_counts_part_vaccine_65P" +PART_VAC_65P = "counts_part_vaccine_65P" + + + +SIGNALS = [ + FULL_VAC_SUM , + FULL_VAC , + FULL_VAC_SUM_12P , + FULL_VAC_12P , + FULL_VAC_SUM_18P , + FULL_VAC_18P , + FULL_VAC_SUM_65P , + FULL_VAC_65P , + PART_VAC_SUM , + PART_VAC , + PART_VAC_SUM_12P , + PART_VAC_12P , + PART_VAC_SUM_18P , + PART_VAC_18P , + PART_VAC_SUM_65P , + PART_VAC_65P +] + +GEOS = [ + "nation", + "state_code", + "hrr", + "hhs" +] + +SMOOTHERS = [ + (Smoother("identity", impute_method=None), ""), + (Smoother("moving_average", window_length=7), "_7dav"), +] diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py new file mode 100644 index 000000000..e19043163 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +"""Functions for pulling data from the CDC data website for vaccines.""" +import hashlib +from logging import Logger +from delphi_utils.geomap import GeoMapper +import numpy as np +import pandas as pd + + + + + +def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: + """Pull the latest data from the CDC on vaccines and conform it into a dataset. + + The output dataset has: + - Each row corresponds to (County, Date), denoted (FIPS, timestamp) + - Each row additionally has columns that correspond to the counts or + cumulative counts of vaccination status (fully vaccinated, + partially vaccinated) of various age groups (all, 12+, 18+, 65+) + from December 13th 2020 until the latest date + + Note that the raw dataset gives the `cumulative` metrics, from which + we compute `counts` by taking first differences. Hence, `counts` + may be negative. This is wholly dependent on the quality of the raw + dataset. + + We filter the data such that we only keep rows with valid FIPS, or "FIPS" + codes defined under the exceptions of the README. The current exceptions + include: + # - 0: statewise unallocated + Parameters + ---------- + base_url: str + Base URL for pulling the CDC Vaccination Data + logger: Logger + Returns + ------- + pd.DataFrame + Dataframe as described above. + """ + # Columns to drop the the data frame. + drop_columns = [ + "date", + "recip_state", + "series_complete_pop_pct", + "mmwr_week", + "recip_county", + "state_id" + ] + + + # Read data + df = pd.read_csv(base_url) + logger.info("data retrieved from source", + num_rows=df.shape[0], + num_cols=df.shape[1], + min_date=min(df['Date']), + max_date=max(df['Date']), + checksum=hashlib.sha256(pd.util.hash_pandas_object(df).values).hexdigest()) + df.columns = [i.lower() for i in df.columns] + + df['recip_state'] = df['recip_state'].str.lower() + drop_columns.extend([x for x in df.columns if ("pct" in x) | ("svi" in x)]) + drop_columns = list(set(drop_columns)) + df = GeoMapper().add_geocode(df, "state_id", "state_code", + from_col="recip_state", new_col="state_id", dropna=False) + df['state_id'] = df['state_id'].fillna('0').astype(int) + # Change FIPS from 0 to XX000 for statewise unallocated cases/deaths + unassigned_index = (df["fips"] == "UNK") + df.loc[unassigned_index, "fips"] = df["state_id"].loc[unassigned_index].values * 1000 + + # Conform FIPS + df["fips"] = df["fips"].apply(lambda x: f"{int(x):05d}") + df["timestamp"] = pd.to_datetime(df["date"]) + # Drop unnecessary columns (state is pre-encoded in fips) + try: + df.drop(drop_columns, axis=1, inplace=True) + except KeyError as e: + raise ValueError( + "Tried to drop non-existent columns. The dataset " + "schema may have changed. Please investigate and " + "amend drop_columns." + ) from e + # timestamp: str -> datetime + df.columns = ["fips", + "cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "timestamp"] + df_dummy = df.loc[(df["fips"]!='00000') & (df["timestamp"] == min(df["timestamp"]))].copy() + #handle fips 00000 separately + df_oth = df.loc[((df["fips"]=='00000') & + (df["timestamp"]==min(df[df['fips'] == '00000']['timestamp'])))].copy() + df_dummy = pd.concat([df_dummy, df_oth]) + df_dummy.loc[:, "timestamp"] = df_dummy.loc[:, "timestamp"] - pd.Timedelta(days=1) + df_dummy.loc[:, ["cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + ]] = 0 + + df =pd.concat([df_dummy, df]) + # Obtain new_counts + df.sort_values(["fips", "timestamp"], inplace=True) + df["counts_tot_vaccine"] = df["cumulative_counts_tot_vaccine"].diff() # 1st discrete difference + df["counts_tot_vaccine_12P"] = df["cumulative_counts_tot_vaccine_12P"].diff() + df["counts_tot_vaccine_18P"] = df["cumulative_counts_tot_vaccine_18P"].diff() + df["counts_tot_vaccine_65P"] = df["cumulative_counts_tot_vaccine_65P"].diff() + df["counts_part_vaccine"] = df["cumulative_counts_part_vaccine"].diff() + df["counts_part_vaccine_12P"] = df["cumulative_counts_part_vaccine_12P"].diff() + df["counts_part_vaccine_18P"] = df["cumulative_counts_part_vaccine_18P"].diff() + df["counts_part_vaccine_65P"] = df["cumulative_counts_part_vaccine_65P"].diff() + + rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] + # Handle edge cases where we diffed across fips + mask = df["fips"] != df["fips"].shift(1) + df.loc[mask, rem_list] = np.nan + print(rem_list) + df.reset_index(inplace=True, drop=True) + # Final sanity checks + unique_days = df["timestamp"].unique() + min_timestamp = min(unique_days) + max_timestamp = max(unique_days) + n_days = (max_timestamp - min_timestamp) / np.timedelta64(1, "D") + 1 + if n_days != len(unique_days): + raise ValueError( + f"Not every day between {min_timestamp} and " + "{max_timestamp} is represented." + ) + return df.loc[ + df["timestamp"] >= min(df["timestamp"]), + [ # Reorder + "fips", + "timestamp", + "cumulative_counts_tot_vaccine", + "counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "counts_part_vaccine_65P" + ], + ].reset_index(drop=True) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py new file mode 100644 index 000000000..455ca44d2 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +"""Functions to call when running the function. + +This module should contain a function called `run_module`, that is executed +when the module is run with `python -m MODULE_NAME`. `run_module`'s lone argument should be a +nested dictionary of parameters loaded from the params.json file. We expect the `params` to have +the following structure: + - "common": + - "export_dir": str, directory to which the results are exported + - "log_filename": (optional) str, path to log file + - "indicator": (optional) + - "wip_signal": (optional) Any[str, bool], list of signals that are works in progress, or + True if all signals in the registry are works in progress, or False if only + unpublished signals are. See `delphi_utils.add_prefix()` + - Any other indicator-specific settings +""" +from datetime import timedelta, datetime +from itertools import product +import time as tm +import numpy as np +from delphi_utils.export import create_export_csv +from delphi_utils.geomap import GeoMapper +from delphi_utils import get_structured_logger +from .constants import GEOS, SIGNALS, SMOOTHERS +from .pull import pull_cdcvacc_data + + +def run_module(params): + """ + Run the indicator. + + Arguments + -------- + params: Dict[str, Any] + Nested dictionary of parameters. + """ + start_time = tm.time() + logger = get_structured_logger( + __name__, filename=params["common"].get("log_filename"), + log_exceptions=params["common"].get("log_exceptions", True)) + base_url = params["indicator"]["base_url"] + ## build the base version of the signal at the most detailed geo level you can get. + all_data = pull_cdcvacc_data(base_url, logger) + run_stats = [] + ## aggregate & smooth + for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): + logger.info("Running on ", + sensor=sensor, + smoother=smoother, + geo=geo) + df = GeoMapper().replace_geocode( + all_data[['timestamp','fips', sensor]],from_col='fips', + from_code="fips", + new_col="geo_id", + new_code=geo, + date_col="timestamp") + df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( + smoother[0].smooth + ) + df["se"] = np.nan + df["sample_size"] = np.nan + sensor_name = sensor + smoother[1] + if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): + # don't export first 6 days for smoothed signals since they'll be nan. + start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) + exported_csv_dates = create_export_csv( + df, + params["common"]["export_dir"], + geo, + sensor_name, + start_date=start_date) + if len(exported_csv_dates) > 0: + run_stats.append((max(exported_csv_dates), len(exported_csv_dates))) + ## log this indicator run + elapsed_time_in_seconds = round(tm.time() - start_time, 2) + min_max_date = run_stats and min(s[0] for s in run_stats) + csv_export_count = sum(s[-1] for s in run_stats) + max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days + formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d") + logger.info("Completed indicator run", + elapsed_time_in_seconds = elapsed_time_in_seconds, + csv_export_count = csv_export_count, + max_lag_in_days = max_lag_in_days, + oldest_final_export_date = formatted_min_max_date) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template new file mode 100644 index 000000000..3d5337846 --- /dev/null +++ b/cdc_vaccines/params.json.template @@ -0,0 +1,35 @@ +{ + "common": { + "export_dir": "./receiving", + "log_filename": "cdc_vaccines.log" + }, + "indicator": { + "base_url": "https://data.cdc.gov/api/views/8xkx-amqh/rows.csv", + "export_start_date": "2020-12-13" + }, + "validation": { + "common": { + "data_source": "cdc", + "span_length": 14, + "min_expected_lag": {"all": "1"}, + "max_expected_lag": {"all": "7"}, + "dry_run": true, + "suppressed_errors": [] + }, + "static": { + "minimum_sample_size": 0, + "missing_se_allowed": true, + "missing_sample_size_allowed": true + }, + "dynamic": {} + }, + "archive": { + "aws_credentials": { + "aws_access_key_id": "", + "aws_secret_access_key": "" + }, + "bucket_name": "", + "indicator_prefix": "usafacts", + "cache_dir": "./cache" + } +} diff --git a/cdc_vaccines/setup.py b/cdc_vaccines/setup.py new file mode 100644 index 000000000..8802dfd45 --- /dev/null +++ b/cdc_vaccines/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup +from setuptools import find_packages + +required = [ + "numpy", + "pandas", + "pydocstyle", + "pytest", + "pytest-cov", + "pylint==2.8.3", + "delphi-utils", + "covidcast" +] + +setup( + name="delphi_cdc_vaccines", + version="0.0.1", + description="The number of people who are vaccinated per county.", + author="Ananya Joshi", + author_email="aajoshi@andrew.cmu.edu", + url="https://github.com/cmu-delphi/covidcast-indicators", + install_requires=required, + classifiers=[ + "Development Status :: 0 - Attempt", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3.8", + ], + packages=find_packages(), +) diff --git a/cdc_vaccines/static/.gitignore b/cdc_vaccines/static/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/tests/test_data/bad_extra_cols.csv b/cdc_vaccines/tests/test_data/bad_extra_cols.csv new file mode 100644 index 000000000..6642296a1 --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_extra_cols.csv @@ -0,0 +1,8 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI,Extra_Administered_Dose1_Recip_12PlusPop_Pct,Extra_Administered_Dose1_Recip_18Plus,Extra_Administered_Dose1_Recip_18PlusPop_Pct,Extra_Administered_Dose1_Recip_65Plus,Extra_Administered_Dose1_Recip_65PlusPop_Pct,Extra_SVI_CTGY +8/26/21,UNK,34,Unknown County,UNK,0,789625,789591,0,733809,0,55620,0,0,1119266,0,1119203,0,1035082,0,75596,0,,,,,,0,1035082,0,75596,0, +8/26/21,32013,34,Humboldt County,NV,32.9,5537,5535,40.2,5368,43.6,1696,69.9,94.9,6293,37.4,6290,45.6,6014,48.9,1877,77.3,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,45.6,6014,48.9,1877,77.3,Mod-High +8/26/21,47131,34,Obion County,TN,28.4,8529,8529,33.1,8412,35.7,4114,66,97.8,10758,35.8,10755,41.7,10520,44.6,4625,74.2,High,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI,Mod-High VC/High SVI,41.7,10520,44.6,4625,74.2,High +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,,,0,0,0,0,0,High +8/26/21,UNK,34,Unknown County,VA,0,2358403,2352494,0,2206696,0,603704,0,51.3,2705300,0,2696267,0,2516857,0,660454,0,,,,,,0,2516857,0,660454,0, +8/26/21,51678,34,Lexington city,VA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Low-Mod,,,,,0,0,0,0,0,Low-Mod +8/26/21,50025,34,Windham County,VT,56.8,23963,23963,64,22620,65.1,7718,76.5,73.7,27598,65.4,27588,73.7,25830,74.3,8588,85.1,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,73.7,25830,74.3,8588,85.1,Low-Mod \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/bad_missing_cols.csv b/cdc_vaccines/tests/test_data/bad_missing_cols.csv new file mode 100644 index 000000000..4efd5c1bd --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_missing_cols.csv @@ -0,0 +1,8 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_65PlusPop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus +8/26/21,UNK,34,Unknown County,UNK,0,0,1119203,0,1035082,0,75596 +8/26/21,32013,34,Humboldt County,NV,32.9,69.9,6290,45.6,6014,48.9,1877 +8/26/21,47131,34,Obion County,TN,28.4,66,10755,41.7,10520,44.6,4625 +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0 +8/26/21,UNK,34,Unknown County,VA,0,0,2696267,0,2516857,0,660454 +8/26/21,51678,34,Lexington city,VA,0,0,0,0,0,0,0 +8/26/21,50025,34,Windham County,VT,56.8,76.5,27588,73.7,25830,74.3,8588 \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/bad_missing_days.csv b/cdc_vaccines/tests/test_data/bad_missing_days.csv new file mode 100644 index 000000000..75a15510c --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_missing_days.csv @@ -0,0 +1,31 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/25/21,26007,34,Alpena County,MI,44.8,12730,12730,51,12344,53.5,5137,75.8,93.8,13426,47.3,13425,53.8,13293,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/25/21,30071,34,Phillips County,MT,33.8,1337,1337,40.1,1295,43,618,67.5,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/25/21,41029,34,Jackson County,OR,46,101653,101502,53.2,97081,55.2,37628,75.5,97.7,115107,52.1,114881,60.2,109038,62,40450,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/25/21,UNK,34,Unknown County,UNK,0,787349,787315,0,731890,0,55518,0,0,1116357,0,1116294,0,1032597,0,75307,0,,,,, +8/25/21,49041,34,Sevier County,UT,30.7,6630,6630,37.5,6350,41.3,2602,72.9,97.2,8092,37.4,8089,45.8,7606,49.5,2928,82.1,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/25/21,26013,34,Baraga County,MI,48.8,4002,4001,54.4,3927,57.7,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,30071,34,Phillips County,MT,33.8,1337,1337,40.1,1295,43,618,67.5,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/24/21,26007,34,Alpena County,MI,44.8,12719,12719,50.9,12338,53.4,5137,75.8,93.8,13422,47.3,13421,53.8,13289,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/24/21,26013,34,Baraga County,MI,48.8,4002,4001,54.4,3927,57.7,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,41029,34,Jackson County,OR,46,101651,101500,53.2,97079,55.2,37627,75.5,97.7,115101,52.1,114875,60.2,109032,62,40448,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,49041,34,Sevier County,UT,30.6,6607,6607,37.4,6328,41.2,2597,72.8,97.2,8050,37.2,8047,45.6,7567,49.2,2921,81.9,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/24/21,UNK,34,Unknown County,UNK,0,785133,785100,0,730087,0,55397,0,0,1113646,0,1113584,0,1030330,0,75003,0,,,,, +8/23/21,41029,34,Jackson County,OR,45.9,101501,101350,53.1,96953,55.1,37621,75.5,97.7,114861,52,114635,60,108835,61.9,40432,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/23/21,49041,34,Sevier County,UT,30.3,6557,6557,37.1,6295,41,2586,72.5,97.2,7920,36.6,7918,44.8,7478,48.7,2913,81.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/23/21,26007,34,Alpena County,MI,44.8,12714,12714,50.9,12334,53.4,5137,75.8,93.8,13422,47.3,13421,53.8,13289,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/23/21,30071,34,Phillips County,MT,33.8,1336,1336,40.1,1294,42.9,617,67.4,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/23/21,26013,34,Baraga County,MI,48.7,4001,4000,54.4,3926,57.6,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/23/21,UNK,34,Unknown County,UNK,0,783092,783059,0,728591,0,55262,0,0,1111277,0,1111215,0,1028562,0,74699,0,,,,, +8/21/21,49041,33,Sevier County,UT,30.3,6557,6557,37.1,6295,41,2586,72.5,97.2,7919,36.6,7917,44.8,7477,48.7,2913,81.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/21/21,26013,33,Baraga County,MI,48.7,3997,3996,54.4,3922,57.6,1438,77,93.8,4224,51.5,4223,57.5,4155,61,1480,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/21/21,30071,33,Phillips County,MT,33.8,1335,1335,40.1,1293,42.9,617,67.4,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/21/21,41029,33,Jackson County,OR,45.7,101069,100918,52.9,96575,54.9,37554,75.3,97.7,114032,51.6,113806,59.6,108142,61.5,40324,80.9,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/21/21,26007,33,Alpena County,MI,44.7,12697,12697,50.9,12320,53.4,5133,75.7,93.8,13418,47.2,13417,53.7,13285,57.5,5592,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/21/21,UNK,33,Unknown County,UNK,0,777853,777820,0,724853,0,55045,0,0,1103847,0,1103785,0,1023256,0,74303,0,,,,, +8/19/21,26013,33,Baraga County,MI,48.6,3991,3990,54.3,3916,57.5,1437,77,93.8,4220,51.4,4219,57.4,4151,60.9,1480,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,41029,33,Jackson County,OR,45.6,100654,100503,52.6,96215,54.7,37479,75.2,97.7,113175,51.2,112949,59.2,107380,61.1,40201,80.6,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,UNK,33,Unknown County,UNK,0,772959,772926,0,720903,0,54789,0,0,1096323,0,1096261,0,1017125,0,73976,0,,,,, +8/19/21,30071,33,Phillips County,MT,33.5,1326,1326,39.8,1284,42.6,615,67.1,95.6,1501,38,1499,45,1438,47.7,664,72.5,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,49041,33,Sevier County,UT,30.2,6526,6526,37,6267,40.8,2576,72.2,97.1,7827,36.2,7825,44.3,7395,48.1,2904,81.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/19/21,26007,33,Alpena County,MI,44.6,12662,12662,50.7,12296,53.3,5130,75.7,93.8,13406,47.2,13405,53.7,13276,57.5,5591,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/small.csv b/cdc_vaccines/tests/test_data/small.csv new file mode 100644 index 000000000..d4243a035 --- /dev/null +++ b/cdc_vaccines/tests/test_data/small.csv @@ -0,0 +1,109 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/19/21,72121,33,Sabana Grande Municipio,PR,66.1,14360,14360,74.2,13402,75.2,4236,99.9,96,16305,75.1,16305,84.2,15052,84.5,4587,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,72129,33,San Lorenzo Municipio,PR,56.9,20489,20481,63.8,18994,64.3,4771,78,96,24048,66.8,24031,74.9,22111,74.9,5805,94.9,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/19/21,55031,33,Douglas County,WI,35.2,15169,15169,40.3,14557,41.9,5178,62.5,97,18005,41.7,18005,47.9,17155,49.4,5778,69.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,47067,33,Hancock County,TN,24.7,1634,1634,28.6,1613,30.8,676,47,97.8,1818,27.5,1818,31.8,1789,34.2,726,50.5,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/19/21,UNK,33,Unknown County,UNK,0,772959,772926,0,720903,0,54789,0,0,1096323,0,1096261,0,1017125,0,73976,0,,,,, +8/19/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/19/21,49031,33,Piute County,UT,29.7,439,439,33.7,435,38.7,244,58.7,97.1,527,35.6,527,40.5,518,46.1,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,1043,33,Cullman County,AL,27.7,23211,23210,32.5,22877,35.2,9906,63,91.9,29475,35.2,29470,41.3,28641,44.1,11151,70.9,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/19/21,UNK,33,Unknown County,TN,0,59660,59642,0,58874,0,15125,0,97.8,74682,0,74626,0,72349,0,17334,0,,,,, +8/19/21,18109,33,Morgan County,IN,44.5,31398,31397,51.9,30057,55,10392,85.2,98.6,33772,47.9,33771,55.8,32057,58.6,10663,87.4,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/19/21,56009,33,Converse County,WY,25.1,3466,3466,29.9,3397,32.5,1436,60.4,96.4,3971,28.7,3970,34.2,3864,37,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/19/21,18099,33,Marshall County,IN,36.7,16967,16967,43.6,16139,46.5,6135,72.6,98.6,18507,40,18505,47.6,17320,49.9,6355,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/18/21,49031,33,Piute County,UT,29.7,439,439,33.7,435,38.7,244,58.7,97.1,527,35.6,527,40.5,518,46.1,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/18/21,55031,33,Douglas County,WI,35.1,15136,15136,40.2,14529,41.9,5171,62.4,97,17954,41.6,17954,47.7,17116,49.3,5773,69.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/18/21,72129,33,San Lorenzo Municipio,PR,56.9,20465,20457,63.7,18973,64.2,4768,77.9,96,23962,66.6,23945,74.6,22040,74.6,5801,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/18/21,56009,33,Converse County,WY,25.1,3465,3465,29.8,3396,32.5,1436,60.4,96.4,3963,28.7,3962,34.1,3856,36.9,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/18/21,1043,33,Cullman County,AL,27.6,23088,23087,32.4,22765,35,9883,62.8,91.9,29328,35,29323,41.1,28502,43.9,11133,70.8,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/18/21,18109,33,Morgan County,IN,44.5,31348,31347,51.8,30021,54.9,10388,85.1,98.6,33713,47.8,33712,55.7,32002,58.5,10656,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/18/21,72121,33,Sabana Grande Municipio,PR,65.9,14319,14319,73.9,13369,75,4233,99.9,96,16232,74.8,16232,83.8,14988,84.1,4574,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,18099,33,Marshall County,IN,36.6,16939,16939,43.5,16112,46.4,6128,72.5,98.6,18474,39.9,18472,47.5,17293,49.8,6355,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,UNK,33,Unknown County,UNK,0,770751,770718,0,719193,0,54698,0,0,1092878,0,1092816,0,1014393,0,73827,0,,,,, +8/18/21,UNK,33,Unknown County,TN,0,59506,59488,0,58728,0,15089,0,97.8,74350,0,74295,0,72040,0,17267,0,,,,, +8/18/21,47067,33,Hancock County,TN,24.6,1631,1631,28.5,1610,30.7,675,46.9,97.8,1810,27.3,1810,31.6,1783,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/17/21,UNK,33,Unknown County,UNK,0,768634,768601,0,717447,0,54645,0,0,1089178,0,1089116,0,1011387,0,73711,0,,,,, +8/17/21,18099,33,Marshall County,IN,36.5,16891,16891,43.4,16078,46.3,6123,72.4,98.6,18392,39.8,18390,47.3,17259,49.7,6354,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/17/21,55031,33,Douglas County,WI,35,15121,15121,40.2,14515,41.8,5168,62.4,97,17936,41.6,17936,47.7,17098,49.3,5771,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/17/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/17/21,72129,33,San Lorenzo Municipio,PR,56.9,20465,20457,63.7,18973,64.2,4768,77.9,96,23962,66.6,23945,74.6,22040,74.6,5801,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/17/21,UNK,33,Unknown County,TN,0,59345,59327,0,58576,0,15053,0,97.8,74078,0,74023,0,71776,0,17214,0,,,,, +8/17/21,56009,33,Converse County,WY,25.1,3463,3463,29.8,3394,32.5,1436,60.4,96.4,3961,28.7,3960,34.1,3854,36.9,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/17/21,47067,33,Hancock County,TN,24.6,1629,1629,28.5,1608,30.7,675,46.9,97.8,1806,27.3,1806,31.6,1779,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/17/21,18109,33,Morgan County,IN,44.4,31316,31315,51.7,29996,54.9,10387,85.1,98.6,33662,47.8,33661,55.6,31960,58.4,10651,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/17/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/17/21,1043,33,Cullman County,AL,27.5,23032,23031,32.3,22712,35,9875,62.8,91.9,29254,34.9,29249,41,28431,43.8,11126,70.7,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/17/21,72121,33,Sabana Grande Municipio,PR,65.9,14319,14319,73.9,13369,75,4233,99.9,96,16232,74.8,16232,83.8,14988,84.1,4574,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,UNK,33,Unknown County,TN,0,59321,59303,0,58555,0,15048,0,97.8,73946,0,73891,0,71657,0,17203,0,,,,, +8/16/21,18099,33,Marshall County,IN,36.5,16879,16879,43.4,16068,46.3,6122,72.4,98.6,18369,39.7,18367,47.2,17239,49.7,6351,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,1043,33,Cullman County,AL,27.4,22971,22970,32.2,22657,34.9,9869,62.7,91.9,29137,34.8,29132,40.9,28333,43.6,11111,70.6,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/16/21,72129,33,San Lorenzo Municipio,PR,56.8,20426,20418,63.6,18941,64.1,4761,77.8,96,23869,66.3,23852,74.3,21971,74.4,5797,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/16/21,55031,33,Douglas County,WI,35,15120,15120,40.2,14515,41.8,5168,62.4,97,17928,41.5,17928,47.7,17090,49.2,5771,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/16/21,72121,33,Sabana Grande Municipio,PR,65.9,14305,14305,73.9,13359,75,4231,99.9,96,16159,74.4,16159,83.4,14924,83.8,4561,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,56009,33,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3902,28.2,3901,33.6,3809,36.5,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/16/21,18109,33,Morgan County,IN,44.4,31271,31270,51.6,29963,54.8,10384,85.1,98.6,33622,47.7,33621,55.5,31929,58.4,10648,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/16/21,UNK,33,Unknown County,UNK,0,766588,766555,0,716121,0,54544,0,0,1085417,0,1085355,0,1008952,0,73510,0,,,,, +8/16/21,47067,33,Hancock County,TN,24.6,1629,1629,28.5,1608,30.7,676,47,97.8,1805,27.3,1805,31.6,1778,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/16/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/16/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,72129,33,San Lorenzo Municipio,PR,56.7,20413,20405,63.6,18928,64.1,4755,77.7,96,23830,66.2,23813,74.2,21945,74.3,5792,94.7,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/15/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/15/21,47067,33,Hancock County,TN,24.6,1628,1628,28.5,1607,30.7,676,47,97.8,1804,27.3,1804,31.5,1777,33.9,723,50.2,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/15/21,1043,33,Cullman County,AL,27.3,22861,22860,32.1,22570,34.7,9861,62.7,91.9,28943,34.6,28938,40.6,28169,43.4,11095,70.5,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/15/21,18109,33,Morgan County,IN,44.3,31220,31219,51.6,29919,54.7,10381,85.1,98.6,33573,47.6,33572,55.5,31893,58.3,10645,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/15/21,55031,33,Douglas County,WI,34.9,15080,15080,40.1,14483,41.7,5162,62.3,97,17875,41.4,17875,47.5,17057,49.1,5765,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,72121,33,Sabana Grande Municipio,PR,65.9,14299,14299,73.8,13355,75,4230,99.9,96,16119,74.2,16119,83.2,14893,83.6,4555,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/15/21,56009,33,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3901,28.2,3900,33.6,3808,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/15/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,UNK,33,Unknown County,UNK,0,764575,764542,0,714649,0,54426,0,0,1081257,0,1081195,0,1005831,0,73305,0,,,,, +8/15/21,UNK,33,Unknown County,TN,0,59136,59118,0,58379,0,15004,0,97.8,73626,0,73571,0,71349,0,17149,0,,,,, +8/15/21,18099,33,Marshall County,IN,36.5,16868,16868,43.3,16059,46.3,6122,72.4,98.6,18335,39.6,18333,47.1,17209,49.6,6350,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,47067,32,Hancock County,TN,24.6,1626,1626,28.4,1605,30.6,676,47,97.8,1797,27.1,1797,31.4,1771,33.8,721,50.1,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/14/21,18099,32,Marshall County,IN,36.4,16854,16854,43.3,16047,46.2,6120,72.4,98.6,18308,39.6,18306,47,17186,49.5,6347,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,72121,32,Sabana Grande Municipio,PR,65.8,14282,14282,73.8,13340,74.9,4230,99.9,96,16094,74.1,16094,83.1,14873,83.5,4554,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,55031,32,Douglas County,WI,34.9,15080,15080,40.1,14483,41.7,5162,62.3,97,17875,41.4,17875,47.5,17057,49.1,5765,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/14/21,18109,32,Morgan County,IN,44.2,31191,31190,51.5,29898,54.7,10380,85.1,98.6,33520,47.6,33519,55.4,31853,58.3,10643,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/14/21,72129,32,San Lorenzo Municipio,PR,56.6,20380,20372,63.5,18902,64,4751,77.7,96,23744,66,23727,73.9,21886,74.1,5789,94.6,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/14/21,UNK,32,Unknown County,UNK,0,762688,762656,0,713187,0,54336,0,0,1077619,0,1077559,0,1002992,0,73097,0,,,,, +8/14/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/14/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3901,28.2,3900,33.6,3808,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/14/21,1043,32,Cullman County,AL,27.2,22777,22776,31.9,22497,34.6,9848,62.6,91.8,28740,34.3,28735,40.3,27994,43.1,11077,70.4,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/14/21,UNK,32,Unknown County,TN,0,59055,59037,0,58308,0,14997,0,97.8,73414,0,73359,0,71149,0,17126,0,,,,, +8/14/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,18099,32,Marshall County,IN,36.4,16832,16832,43.3,16028,46.2,6117,72.3,98.6,18263,39.5,18261,46.9,17147,49.4,6343,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/13/21,1043,32,Cullman County,AL,27.2,22758,22757,31.9,22480,34.6,9843,62.6,91.8,28693,34.3,28688,40.2,27952,43,11073,70.4,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/13/21,47067,32,Hancock County,TN,24.5,1624,1624,28.4,1605,30.6,676,47,97.8,1797,27.1,1797,31.4,1771,33.8,721,50.1,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/13/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3900,28.2,3899,33.6,3807,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/13/21,UNK,32,Unknown County,TN,0,58888,58870,0,58148,0,14972,0,97.8,73059,0,73004,0,70807,0,17061,0,,,,, +8/13/21,18109,32,Morgan County,IN,44.2,31134,31133,51.4,29852,54.6,10377,85.1,98.6,33443,47.4,33442,55.2,31787,58.1,10635,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/13/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,72129,32,San Lorenzo Municipio,PR,56.5,20335,20327,63.3,18865,63.9,4746,77.6,96,23666,65.8,23649,73.7,21826,73.9,5788,94.6,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/13/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/13/21,55031,32,Douglas County,WI,34.9,15058,15058,40,14466,41.7,5158,62.2,97,17847,41.4,17847,47.5,17031,49.1,5762,69.5,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,72121,32,Sabana Grande Municipio,PR,65.6,14253,14253,73.6,13316,74.7,4225,99.9,96,16071,74,16071,83,14852,83.4,4550,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/13/21,UNK,32,Unknown County,UNK,0,760315,760283,0,711222,0,54262,0,0,1072813,0,1072753,0,998950,0,72929,0,,,,, +8/12/21,UNK,32,Unknown County,UNK,0,758305,758273,0,709538,0,54190,0,0,1068845,0,1068785,0,995682,0,72750,0,,,,, +8/12/21,18099,32,Marshall County,IN,36.4,16817,16817,43.2,16013,46.1,6116,72.3,98.6,18242,39.4,18240,46.9,17129,49.4,6341,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/12/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/12/21,72129,32,San Lorenzo Municipio,PR,56.4,20296,20288,63.2,18833,63.8,4740,77.5,96,23615,65.6,23598,73.5,21786,73.8,5784,94.5,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/12/21,72121,32,Sabana Grande Municipio,PR,65.6,14234,14234,73.5,13303,74.7,4220,99.9,96,16057,74,16057,82.9,14840,83.3,4548,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/12/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/12/21,55031,32,Douglas County,WI,34.9,15040,15040,40,14452,41.6,5153,62.2,97,17820,41.3,17820,47.4,17007,49,5756,69.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/12/21,18109,32,Morgan County,IN,44.1,31079,31078,51.3,29808,54.5,10372,85,98.6,33362,47.3,33361,55.1,31719,58,10630,87.1,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/12/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3899,28.2,3898,33.6,3806,36.4,1589,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/12/21,UNK,32,Unknown County,TN,0,58763,58745,0,58026,0,14940,0,97.8,72726,0,72672,0,70489,0,17016,0,,,,, +8/12/21,1043,32,Cullman County,AL,27.1,22693,22692,31.8,22421,34.5,9833,62.5,91.8,28535,34.1,28530,40,27817,42.8,11062,70.3,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/12/21,47067,32,Hancock County,TN,24.5,1623,1623,28.4,1604,30.6,676,47,97.8,1795,27.1,1795,31.4,1769,33.8,720,50,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/11/21,18099,32,Marshall County,IN,36.3,16794,16794,43.2,15996,46.1,6113,72.3,98.6,18199,39.3,18197,46.8,17097,49.3,6338,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/11/21,72121,32,Sabana Grande Municipio,PR,65.2,14166,14166,73.2,13266,74.5,4214,99.9,96,16023,73.8,16023,82.7,14818,83.2,4545,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/11/21,72129,32,San Lorenzo Municipio,PR,56.3,20279,20271,63.2,18821,63.7,4738,77.4,96,23592,65.6,23575,73.4,21767,73.7,5783,94.5,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/11/21,UNK,32,Unknown County,TN,0,58601,58583,0,57870,0,14895,0,97.8,72472,0,72418,0,70251,0,16956,0,,,,, +8/11/21,47067,32,Hancock County,TN,24.5,1622,1622,28.4,1603,30.6,675,46.9,97.8,1794,27.1,1794,31.4,1768,33.8,720,50,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/11/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/11/21,UNK,32,Unknown County,UNK,0,756368,756338,0,707922,0,54140,0,0,1065083,0,1065026,0,992587,0,72616,0,,,,, +8/11/21,55031,32,Douglas County,WI,34.8,15014,15014,39.9,14434,41.6,5147,62.1,97,17792,41.2,17792,47.3,16986,48.9,5754,69.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/11/21,56009,32,Converse County,WY,24.8,3429,3429,29.5,3365,32.2,1429,60.1,96.4,3895,28.2,3894,33.5,3802,36.4,1587,66.7,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/11/21,18109,32,Morgan County,IN,44,31047,31046,51.3,29782,54.5,10371,85,98.6,33318,47.3,33317,55,31682,57.9,10627,87.1,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/11/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/11/21,1043,32,Cullman County,AL,27,22603,22602,31.7,22338,34.4,9821,62.4,91.8,28324,33.8,28319,39.7,27622,42.5,11036,70.1,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/small_confirmed.csv b/cdc_vaccines/tests/test_data/small_confirmed.csv new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/tests/test_data/small_pull.csv b/cdc_vaccines/tests/test_data/small_pull.csv new file mode 100644 index 000000000..812835e37 --- /dev/null +++ b/cdc_vaccines/tests/test_data/small_pull.csv @@ -0,0 +1,4 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/26/21,UNK,34,Unknown County,UNK,0,789625,789591,0,733809,0,55620,0,0,1119266,0,1119203,0,1035082,0,75596,0,,,,, +8/26/21,32013,34,Humboldt County,NV,32.9,5537,5535,40.2,5368,43.6,1696,69.9,94.9,6293,37.4,6290,45.6,6014,48.9,1877,77.3,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, \ No newline at end of file diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py new file mode 100644 index 000000000..f49d0eff6 --- /dev/null +++ b/cdc_vaccines/tests/test_pull.py @@ -0,0 +1,71 @@ +"""Tests for running the CDC Vaccine indicator.""" +import pytest +import logging +from unittest.mock import patch + +import pandas as pd +import numpy as np +from delphi_cdc_vaccines.pull import pull_cdcvacc_data + +from test_run import local_fetch + +BASE_URL_GOOD = "test_data/small_pull.csv" + +BASE_URL_BAD = { + "missing_days": "test_data/bad_missing_days.csv", + "missing_cols": "test_data/bad_missing_cols.csv", + "extra_cols": "test_data/bad_extra_cols.csv" +} + +TEST_LOGGER = logging.getLogger() + +class TestPullCDCVaccines: + """Tests for the `pull_cdcvacc_data()` function.""" + def test_good_file(self): + """Test the expected output from a smaller file.""" + df = pull_cdcvacc_data(BASE_URL_GOOD, TEST_LOGGER) + expected_df = pd.DataFrame({ + "fips": ["00000","00000","32013","32013","48305","48305"], + "timestamp": [pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), + pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), + pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26")], + "cumulative_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "cumulative_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "cumulative_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "cumulative_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], + "counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + index=[0, 1, 2, 3, 4, 5]) + # sort since rows order doesn't matter + pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index()) + + def test_missing_days(self): + """Test if error is raised when there are missing days.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_days"], TEST_LOGGER + ) + + def test_missing_cols(self): + """Test if error is raised when there are missing columns.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_cols"],TEST_LOGGER + ) + + def test_extra_cols(self): + """Test if error is raised when there are extra columns.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["extra_cols"], TEST_LOGGER + ) \ No newline at end of file diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py new file mode 100644 index 000000000..f65f00764 --- /dev/null +++ b/cdc_vaccines/tests/test_run.py @@ -0,0 +1,80 @@ +"""Tests for running the CDC Vaccine indicator.""" +from itertools import product +from os import listdir +from os.path import join +from unittest.mock import patch + +import pandas as pd + +from delphi_cdc_vaccines.run import run_module + +def local_fetch(url, cache): + return pd.read_csv(url) + +class TestRun: + """Tests for the `run_module()` function.""" + PARAMS = { + "common": { + "export_dir": "./receiving", + "input_dir": "./input_cache" + }, + "indicator": { + "base_url": "./test_data/small.csv", + "export_start_date": "2021-05-01" + } + } + + def test_output_files_exist(self): + """Test that the expected output files exist.""" + run_module(self.PARAMS) + + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + + dates = [ + "20210810", + "20210811", + "20210812", + "20210813", + "20210814", + "20210815", + "20210816", + "20210817", + "20210818", + "20210819", + ] + geos = ["state_code", "hrr", "hhs", "nation"] + + expected_files = [] + for metric in ["cumulative_counts_tot_vaccine", + "counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "counts_part_vaccine_65P"]: + for date in dates: + for geo in geos: + expected_files += [date + "_" + geo + "_" + metric + ".csv"] + if not("cumulative" in metric) and not (date in dates[:6]): + expected_files += [date + "_" + geo + "_" + metric + "_7dav.csv"] + + + assert set(csv_files) == set(expected_files) + + def test_output_file_format(self): + """Test that the output files have the proper format.""" + run_module(self.PARAMS) + + df = pd.read_csv( + join("receiving", "20210819_state_code_counts_tot_vaccine.csv") + ) + assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file From 69071beff462a3c5410a192ce5f20e83828b3da9 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:31:42 -0400 Subject: [PATCH 31/95] added explicit dictionary creation --- cdc_vaccines/delphi_cdc_vaccines/run.py | 6 ++++++ cdc_vaccines/tests/test_run.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 455ca44d2..93ee75886 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -43,6 +43,11 @@ def run_module(params): all_data = pull_cdcvacc_data(base_url, logger) run_stats = [] ## aggregate & smooth + + + if not os.path.exists(params["common"]["export_dir"]): + os.makedirs(params["common"]["export_dir"]) + for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): logger.info("Running on ", sensor=sensor, @@ -60,6 +65,7 @@ def run_module(params): df["se"] = np.nan df["sample_size"] = np.nan sensor_name = sensor + smoother[1] + if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index f65f00764..e1ecbb17f 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -20,7 +20,7 @@ class TestRun: }, "indicator": { "base_url": "./test_data/small.csv", - "export_start_date": "2021-05-01" + "export_start_date": "2021-08-10" } } From b63e46d1f49c8ae478432b7f18220237229fa078 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:36:00 -0400 Subject: [PATCH 32/95] added os import --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 93ee75886..9a6f9cdae 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -18,6 +18,7 @@ from itertools import product import time as tm import numpy as np +import os from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger From fe67af3a5dba4b36d728091892db26875f682850 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:43:08 -0400 Subject: [PATCH 33/95] Minor changes for the linter - tests pass locally --- cdc_vaccines/Makefile | 6 +++++- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cdc_vaccines/Makefile b/cdc_vaccines/Makefile index ca3013248..5d1101c53 100644 --- a/cdc_vaccines/Makefile +++ b/cdc_vaccines/Makefile @@ -3,7 +3,7 @@ dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*') venv: - python -m venv env + python3.8 -m venv env install: venv . env/bin/activate; \ @@ -23,3 +23,7 @@ clean: rm -rf env rm -f params.json +run: + env/bin/python -m $(dir) + env/bin/python -m delphi_utils.validator --dry_run + env/bin/python -m delphi_utils.archive \ No newline at end of file diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 9a6f9cdae..c82ee3b16 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -17,8 +17,8 @@ from datetime import timedelta, datetime from itertools import product import time as tm -import numpy as np import os +import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger From 9284835ffcfbd74b11080cbc64cfee5979355f03 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:17:55 -0400 Subject: [PATCH 34/95] minor changes --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 9 ++++++++- cdc_vaccines/delphi_cdc_vaccines/pull.py | 11 ++--------- cdc_vaccines/delphi_cdc_vaccines/run.py | 7 ++++++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 113ecb69e..2896a1f8a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -43,7 +43,7 @@ GEOS = [ "nation", - "state_code", + "state", "hrr", "hhs" ] @@ -52,3 +52,10 @@ (Smoother("identity", impute_method=None), ""), (Smoother("moving_average", window_length=7), "_7dav"), ] + + +DIFFERENCE_MAPPING = { + f"{INCIDENCE}_counts_{completeness}_vaccine{age}": f"{CUMULATIVE}_counts_{completeness}_vaccine{age}" + for completeness, age in product(COMPLETENESS, AGE) +} +SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) \ No newline at end of file diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index e19043163..32a53a4d7 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -112,20 +112,13 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df =pd.concat([df_dummy, df]) # Obtain new_counts df.sort_values(["fips", "timestamp"], inplace=True) - df["counts_tot_vaccine"] = df["cumulative_counts_tot_vaccine"].diff() # 1st discrete difference - df["counts_tot_vaccine_12P"] = df["cumulative_counts_tot_vaccine_12P"].diff() - df["counts_tot_vaccine_18P"] = df["cumulative_counts_tot_vaccine_18P"].diff() - df["counts_tot_vaccine_65P"] = df["cumulative_counts_tot_vaccine_65P"].diff() - df["counts_part_vaccine"] = df["cumulative_counts_part_vaccine"].diff() - df["counts_part_vaccine_12P"] = df["cumulative_counts_part_vaccine_12P"].diff() - df["counts_part_vaccine_18P"] = df["cumulative_counts_part_vaccine_18P"].diff() - df["counts_part_vaccine_65P"] = df["cumulative_counts_part_vaccine_65P"].diff() + for to, from in DIFFERENCE_MAPPING.items(): + df[to] = df[from].diff() rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] # Handle edge cases where we diffed across fips mask = df["fips"] != df["fips"].shift(1) df.loc[mask, rem_list] = np.nan - print(rem_list) df.reset_index(inplace=True, drop=True) # Final sanity checks unique_days = df["timestamp"].unique() diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index c82ee3b16..f5e4751ac 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -50,15 +50,20 @@ def run_module(params): os.makedirs(params["common"]["export_dir"]) for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): + logger.info("Running on ", sensor=sensor, smoother=smoother, geo=geo) + geo_map = geo + if geo=='state': + geo_map='state_code' + df = GeoMapper().replace_geocode( all_data[['timestamp','fips', sensor]],from_col='fips', from_code="fips", new_col="geo_id", - new_code=geo, + new_code=geo_map, date_col="timestamp") df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth From 15eb876002d124e38c5910a0d9722142698cdadc Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:49:39 -0400 Subject: [PATCH 35/95] Update cdc_vaccines/delphi_cdc_vaccines/__main__.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/__main__.py b/cdc_vaccines/delphi_cdc_vaccines/__main__.py index 32fc0eecc..0aa3f6ac1 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/__main__.py +++ b/cdc_vaccines/delphi_cdc_vaccines/__main__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """Call the function run_module when executed. -This file indicates that calling the module (`python -m MODULE_NAME`) will +This file indicates that calling the module (`python -m delphi_cdc_vaccines`) will call the function `run_module` found within the run.py file. There should be no need to change this template. """ From 7a23d2fff21f32459cb017db13574c92fef0b9d6 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:49:51 -0400 Subject: [PATCH 36/95] Update cdc_vaccines/delphi_cdc_vaccines/constants.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 2896a1f8a..6e8a9620c 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -45,7 +45,8 @@ "nation", "state", "hrr", - "hhs" + "hhs", + "msa" ] SMOOTHERS = [ From 0ee8c243b05844890b0d158d6dfe23fedae4dcee Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:50:41 -0400 Subject: [PATCH 37/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index b56682d0f..bcbcee972 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -2,7 +2,7 @@ This indicator provides the number official vaccinations in the US. We export the county-level daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. -We also aggregate the data to the HHS, State, and Nation levels. +We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. Note that individuals could be vaccinated outside of the US. Additionally, From 1997668e12a0ab7264ff05ce17e0bf66f8b4b710 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:51:55 -0400 Subject: [PATCH 38/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index bcbcee972..3423ca851 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -1,7 +1,7 @@ # CDC Vaccinations This indicator provides the number official vaccinations in the US. We export the county-level -daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. +daily vaccination rates data as-is, and publish the result as a COVIDcast signal. We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. From 754bba3727e8302fd8282afebc0d52bf178d8028 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:52:19 -0400 Subject: [PATCH 39/95] Update cdc_vaccines/params.json.template Co-authored-by: Katie Mazaitis --- cdc_vaccines/params.json.template | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template index 3d5337846..bfc654514 100644 --- a/cdc_vaccines/params.json.template +++ b/cdc_vaccines/params.json.template @@ -23,13 +23,4 @@ }, "dynamic": {} }, - "archive": { - "aws_credentials": { - "aws_access_key_id": "", - "aws_secret_access_key": "" - }, - "bucket_name": "", - "indicator_prefix": "usafacts", - "cache_dir": "./cache" - } } From 33069b565c89d948ad54fd42f7ada122f5ba466f Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:03:37 -0400 Subject: [PATCH 40/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index 3423ca851..ea076dc1e 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -1,6 +1,6 @@ # CDC Vaccinations -This indicator provides the number official vaccinations in the US. We export the county-level +This indicator provides the official vaccination counts in the US. We export the county-level daily vaccination rates data as-is, and publish the result as a COVIDcast signal. We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. From e5248212329c94f8b9745bc6531fef5dd8865473 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:10:39 -0400 Subject: [PATCH 41/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index f5e4751ac..0e52a9584 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -60,7 +60,8 @@ def run_module(params): geo_map='state_code' df = GeoMapper().replace_geocode( - all_data[['timestamp','fips', sensor]],from_col='fips', + all_data[['timestamp','fips', sensor]], + from_col='fips', from_code="fips", new_col="geo_id", new_code=geo_map, From 8d1c2d2c7cc50802cded1ebb9fe0a373de04a876 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:12:35 -0400 Subject: [PATCH 42/95] Update cdc_vaccines/delphi_cdc_vaccines/pull.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index 32a53a4d7..e158a0b8c 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -132,24 +132,6 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ) return df.loc[ df["timestamp"] >= min(df["timestamp"]), - [ # Reorder - "fips", - "timestamp", - "cumulative_counts_tot_vaccine", - "counts_tot_vaccine", - "cumulative_counts_tot_vaccine_12P", - "counts_tot_vaccine_12P", - "cumulative_counts_tot_vaccine_18P", - "counts_tot_vaccine_18P", - "cumulative_counts_tot_vaccine_65P", - "counts_tot_vaccine_65P", - "cumulative_counts_part_vaccine", - "counts_part_vaccine", - "cumulative_counts_part_vaccine_12P", - "counts_part_vaccine_12P", - "cumulative_counts_part_vaccine_18P", - "counts_part_vaccine_18P", - "cumulative_counts_part_vaccine_65P", - "counts_part_vaccine_65P" - ], + # Reorder + ["fips", "timestamp"] + SIGNALS, ].reset_index(drop=True) From 0093537f14ac09669006f03c1b5fd85214a0a4ad Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:22:05 -0400 Subject: [PATCH 43/95] changes to the json file --- cdc_vaccines/params.json.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template index bfc654514..3e6af1c23 100644 --- a/cdc_vaccines/params.json.template +++ b/cdc_vaccines/params.json.template @@ -22,5 +22,5 @@ "missing_sample_size_allowed": true }, "dynamic": {} - }, + } } From 253392c2ab522fc223d2dfcc2062953970cd3486 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sat, 11 Sep 2021 13:22:43 -0400 Subject: [PATCH 44/95] changed the signal name generation --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 7 +++---- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- cdc_vaccines/tests/test_pull.py | 23 ++++++++++++++--------- cdc_vaccines/tests/test_run.py | 21 ++++++++++----------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index e158a0b8c..b8e8c6387 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -5,8 +5,7 @@ from delphi_utils.geomap import GeoMapper import numpy as np import pandas as pd - - +from .constants import SIGNALS, DIFFERENCE_MAPPING @@ -112,8 +111,8 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df =pd.concat([df_dummy, df]) # Obtain new_counts df.sort_values(["fips", "timestamp"], inplace=True) - for to, from in DIFFERENCE_MAPPING.items(): - df[to] = df[from].diff() + for to, from_d in DIFFERENCE_MAPPING.items(): + df[to] = df[from_d].diff() rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] # Handle edge cases where we diffed across fips diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 0e52a9584..48c0f24f9 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -72,7 +72,7 @@ def run_module(params): df["se"] = np.nan df["sample_size"] = np.nan sensor_name = sensor + smoother[1] - + print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index f49d0eff6..644f1226c 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -29,22 +29,27 @@ def test_good_file(self): "timestamp": [pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26")], + + "incidence_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "incidence_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "incidence_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "incidence_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "incidence_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "incidence_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "incidence_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "incidence_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], + + + "cumulative_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], - "counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], "cumulative_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], - "counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], "cumulative_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], - "counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], "cumulative_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], - "counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], "cumulative_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], - "counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], "cumulative_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], - "counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], "cumulative_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], - "counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], - "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], - "counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + index=[0, 1, 2, 3, 4, 5]) # sort since rows order doesn't matter pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index()) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index e1ecbb17f..abc966c25 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -42,32 +42,31 @@ def test_output_files_exist(self): "20210818", "20210819", ] - geos = ["state_code", "hrr", "hhs", "nation"] + geos = ["state", "hrr", "hhs", "nation", "msa"] expected_files = [] for metric in ["cumulative_counts_tot_vaccine", - "counts_tot_vaccine", + "incidence_counts_tot_vaccine", "cumulative_counts_tot_vaccine_12P", - "counts_tot_vaccine_12P", + "incidence_counts_tot_vaccine_12P", "cumulative_counts_tot_vaccine_18P", - "counts_tot_vaccine_18P", + "incidence_counts_tot_vaccine_18P", "cumulative_counts_tot_vaccine_65P", - "counts_tot_vaccine_65P", + "incidence_counts_tot_vaccine_65P", "cumulative_counts_part_vaccine", - "counts_part_vaccine", + "incidence_counts_part_vaccine", "cumulative_counts_part_vaccine_12P", - "counts_part_vaccine_12P", + "incidence_counts_part_vaccine_12P", "cumulative_counts_part_vaccine_18P", - "counts_part_vaccine_18P", + "incidence_counts_part_vaccine_18P", "cumulative_counts_part_vaccine_65P", - "counts_part_vaccine_65P"]: + "incidence_counts_part_vaccine_65P"]: for date in dates: for geo in geos: expected_files += [date + "_" + geo + "_" + metric + ".csv"] if not("cumulative" in metric) and not (date in dates[:6]): expected_files += [date + "_" + geo + "_" + metric + "_7dav.csv"] - assert set(csv_files) == set(expected_files) def test_output_file_format(self): @@ -75,6 +74,6 @@ def test_output_file_format(self): run_module(self.PARAMS) df = pd.read_csv( - join("receiving", "20210819_state_code_counts_tot_vaccine.csv") + join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") ) assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file From 30def22bd709f121c08a477862cb417e4f9251d1 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sat, 11 Sep 2021 13:25:47 -0400 Subject: [PATCH 45/95] committed constants --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 55 +++++-------------- 1 file changed, 13 insertions(+), 42 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 6e8a9620c..b8ff83d06 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -1,45 +1,23 @@ """Registry for variations.""" - +from itertools import product from delphi_utils import Smoother -FULL_VAC_SUM = "cumulative_counts_tot_vaccine" -FULL_VAC = "counts_tot_vaccine" -FULL_VAC_SUM_12P = "cumulative_counts_tot_vaccine_12P" -FULL_VAC_12P = "counts_tot_vaccine_12P" -FULL_VAC_SUM_18P = "cumulative_counts_tot_vaccine_18P" -FULL_VAC_18P = "counts_tot_vaccine_18P" -FULL_VAC_SUM_65P = "cumulative_counts_tot_vaccine_65P" -FULL_VAC_65P = "counts_tot_vaccine_65P" -PART_VAC_SUM = "cumulative_counts_part_vaccine" -PART_VAC = "counts_part_vaccine" -PART_VAC_SUM_12P = "cumulative_counts_part_vaccine_12P" -PART_VAC_12P = "counts_part_vaccine_12P" -PART_VAC_SUM_18P = "cumulative_counts_part_vaccine_18P" -PART_VAC_18P = "counts_part_vaccine_18P" -PART_VAC_SUM_65P = "cumulative_counts_part_vaccine_65P" -PART_VAC_65P = "counts_part_vaccine_65P" +CUMULATIVE = 'cumulative' +INCIDENCE ='incidence' +FREQUENCY = [CUMULATIVE, INCIDENCE] +STATUS = ["tot", "part"] +AGE = ["", "_12P", "_18P", "_65P"] +SIGNALS = [f"{frequency}_counts_{status}_vaccine{AGE}" for + frequency, status, age in product(FREQUENCY, STATUS, AGE)] +DIFFERENCE_MAPPING = { + f"{INCIDENCE}_counts_{status}_vaccine{age}": f"{CUMULATIVE}_counts_{status}_vaccine{age}" + for status, age in product(STATUS, AGE) +} +SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) -SIGNALS = [ - FULL_VAC_SUM , - FULL_VAC , - FULL_VAC_SUM_12P , - FULL_VAC_12P , - FULL_VAC_SUM_18P , - FULL_VAC_18P , - FULL_VAC_SUM_65P , - FULL_VAC_65P , - PART_VAC_SUM , - PART_VAC , - PART_VAC_SUM_12P , - PART_VAC_12P , - PART_VAC_SUM_18P , - PART_VAC_18P , - PART_VAC_SUM_65P , - PART_VAC_65P -] GEOS = [ "nation", @@ -53,10 +31,3 @@ (Smoother("identity", impute_method=None), ""), (Smoother("moving_average", window_length=7), "_7dav"), ] - - -DIFFERENCE_MAPPING = { - f"{INCIDENCE}_counts_{completeness}_vaccine{age}": f"{CUMULATIVE}_counts_{completeness}_vaccine{age}" - for completeness, age in product(COMPLETENESS, AGE) -} -SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) \ No newline at end of file From f1edd0fdedeb02e53791bdbc21cfcb776f107b37 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 17 Sep 2021 06:30:41 -0400 Subject: [PATCH 46/95] Modified run.py to have the right NaN codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 48c0f24f9..3013c2c5a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -21,7 +21,7 @@ import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper -from delphi_utils import get_structured_logger +from delphi_utils import get_structured_logger, nancodes from .constants import GEOS, SIGNALS, SMOOTHERS from .pull import pull_cdcvacc_data @@ -69,8 +69,8 @@ def run_module(params): df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth ) - df["se"] = np.nan - df["sample_size"] = np.nan + df["se"] = nancodes.Nans.NOT_APPLICABLE + df["sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From 6495845100c63bfe8152a291abbf4e1bf9986cfe Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Mon, 13 Sep 2021 14:08:57 -0400 Subject: [PATCH 47/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index ea076dc1e..792f9070c 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -6,8 +6,7 @@ We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels For detailed information see the files DETAILS.md contained in this directory. Note that individuals could be vaccinated outside of the US. Additionally, -there is no county level data for counties in Texas and Hawaii. There are unknown -counties in each state and a row for unknown county and unknown state. +there is no county level data for counties in Texas and Hawaii. Each state has some vaccination counts assigned to "unknown county". Some vaccination counts are assigned to "unknown state, unknown county". ## Running the Indicator From e1ee4335de8cba2aab9ea0b3cf47d48279efe662 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sun, 19 Sep 2021 11:28:00 -0400 Subject: [PATCH 48/95] Added appropriate NaN codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 3013c2c5a..43dee11f5 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -18,7 +18,6 @@ from itertools import product import time as tm import os -import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger, nancodes From 4aeb263bc241200203ecaceed932dfa6ddecc244 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 10:12:13 -0400 Subject: [PATCH 49/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Dmitry Shemetov --- cdc_vaccines/delphi_cdc_vaccines/run.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 43dee11f5..d22dfd76d 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -69,7 +69,10 @@ def run_module(params): smoother[0].smooth ) df["se"] = nancodes.Nans.NOT_APPLICABLE - df["sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["se"] = np.nan + df["sample_size"] = np.nan + df["missing_se"] = nancodes.Nans.NOT_APPLICABLE + df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From 5a9e67bb59c36af357b402aa3c0f10a226414d78 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 10:24:48 -0400 Subject: [PATCH 50/95] added back appropriate nan codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index d22dfd76d..43dee11f5 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -69,10 +69,7 @@ def run_module(params): smoother[0].smooth ) df["se"] = nancodes.Nans.NOT_APPLICABLE - df["se"] = np.nan - df["sample_size"] = np.nan - df["missing_se"] = nancodes.Nans.NOT_APPLICABLE - df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From 899f17e4d952482dc0d96a8bb52900cb52622b23 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 18:15:41 -0400 Subject: [PATCH 51/95] changes to run.py --- cdc_vaccines/delphi_cdc_vaccines/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 43dee11f5..51ab3688b 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -68,8 +68,8 @@ def run_module(params): df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth ) - df["se"] = nancodes.Nans.NOT_APPLICABLE - df["sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["missing_se"] = nancodes.Nans.NOT_APPLICABLE + df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From ada5f10027ffcc145d1fc3c70dbc4972e7387894 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Mon, 11 Oct 2021 17:48:06 -0400 Subject: [PATCH 52/95] adding test_run changes with new col names --- cdc_vaccines/tests/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index abc966c25..c96e90112 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -76,4 +76,4 @@ def test_output_file_format(self): df = pd.read_csv( join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") ) - assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file + assert (df.columns.values == ["geo_id", "val", "missing_se", "missing_sample_size"]).all() \ No newline at end of file From 90034b3cfd4b09a940536832293454ca29d3294c Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:02:48 -0400 Subject: [PATCH 53/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 51ab3688b..a567f0097 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -72,7 +72,7 @@ def run_module(params): df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) - if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): + if not (("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) exported_csv_dates = create_export_csv( From 6616178ffc56ec33022f9abbb6138c3cc52b98a4 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:02:53 -0400 Subject: [PATCH 54/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index a567f0097..57dd3a403 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -71,7 +71,6 @@ def run_module(params): df["missing_se"] = nancodes.Nans.NOT_APPLICABLE df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] - print(sensor_name) if not (("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) From 0dc2b6b688b80a84440a7f63e35ae0b85cd98712 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:03:00 -0400 Subject: [PATCH 55/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 57dd3a403..a8777e433 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -2,7 +2,7 @@ """Functions to call when running the function. This module should contain a function called `run_module`, that is executed -when the module is run with `python -m MODULE_NAME`. `run_module`'s lone argument should be a +when the module is run with `python -m delphi_cdc_vaccines`. `run_module`'s lone argument should be a nested dictionary of parameters loaded from the params.json file. We expect the `params` to have the following structure: - "common": From 32067a7cc7cc2e090cb845fe5f6da3565fee7b9c Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:05:20 -0400 Subject: [PATCH 56/95] lint nit --- cdc_vaccines/delphi_cdc_vaccines/run.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index a8777e433..cc633f62a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -2,9 +2,10 @@ """Functions to call when running the function. This module should contain a function called `run_module`, that is executed -when the module is run with `python -m delphi_cdc_vaccines`. `run_module`'s lone argument should be a -nested dictionary of parameters loaded from the params.json file. We expect the `params` to have -the following structure: +when the module is run with `python -m delphi_cdc_vaccines`. +`run_module`'s lone argument should be a nested dictionary of +parameters loaded from the params.json file. +We expect the `params` to have the following structure: - "common": - "export_dir": str, directory to which the results are exported - "log_filename": (optional) str, path to log file From 387949b5e2d67756f3cb29d434e7be5bcfa77044 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:29:20 -0400 Subject: [PATCH 57/95] Modifying for the changes in the base csv file from the CDC --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index b8e8c6387..9b7d8cf89 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -45,7 +45,8 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "series_complete_pop_pct", "mmwr_week", "recip_county", - "state_id" + "state_id", + "metro_status" ] From 62e343aff2773de3f62446eee51f4f7ed5f79726 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:53:04 -0400 Subject: [PATCH 58/95] Changes to the CDC Files and respective changes to tests --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 5 +++-- cdc_vaccines/tests/test_data/small_confirmed.csv | 0 cdc_vaccines/tests/test_pull.py | 7 ------- 3 files changed, 3 insertions(+), 9 deletions(-) delete mode 100644 cdc_vaccines/tests/test_data/small_confirmed.csv diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index 9b7d8cf89..f309064b7 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -45,8 +45,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "series_complete_pop_pct", "mmwr_week", "recip_county", - "state_id", - "metro_status" + "state_id" ] @@ -62,6 +61,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df['recip_state'] = df['recip_state'].str.lower() drop_columns.extend([x for x in df.columns if ("pct" in x) | ("svi" in x)]) + drop_columns.extend(df.columns[22:]) drop_columns = list(set(drop_columns)) df = GeoMapper().add_geocode(df, "state_id", "state_code", from_col="recip_state", new_col="state_id", dropna=False) @@ -82,6 +82,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "schema may have changed. Please investigate and " "amend drop_columns." ) from e + # timestamp: str -> datetime df.columns = ["fips", "cumulative_counts_tot_vaccine", diff --git a/cdc_vaccines/tests/test_data/small_confirmed.csv b/cdc_vaccines/tests/test_data/small_confirmed.csv deleted file mode 100644 index e69de29bb..000000000 diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index 644f1226c..b5f8e3447 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -67,10 +67,3 @@ def test_missing_cols(self): pull_cdcvacc_data( BASE_URL_BAD["missing_cols"],TEST_LOGGER ) - - def test_extra_cols(self): - """Test if error is raised when there are extra columns.""" - with pytest.raises(ValueError): - pull_cdcvacc_data( - BASE_URL_BAD["extra_cols"], TEST_LOGGER - ) \ No newline at end of file From 6c8dbd569437bfbdbc3d1b68ceba6ef7b07998ed Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Mon, 20 Dec 2021 20:46:19 -0500 Subject: [PATCH 59/95] Added an export start and end date --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 66 +++++++++++++++++++----- cdc_vaccines/delphi_cdc_vaccines/run.py | 4 +- cdc_vaccines/params.json.template | 3 +- 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index f309064b7..ebd7400c3 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -9,7 +9,7 @@ -def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: +def pull_cdcvacc_data(base_url: str, export_start_date: str, export_end_date: str, logger: Logger) -> pd.DataFrame: """Pull the latest data from the CDC on vaccines and conform it into a dataset. The output dataset has: @@ -32,6 +32,10 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ---------- base_url: str Base URL for pulling the CDC Vaccination Data + export_start_date: str + The start date for the csv file (can be empty) + export_end_date: + The end date for the csv file (can be empty) logger: Logger Returns ------- @@ -49,8 +53,38 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ] - # Read data + # Read data and cut off by designated start date + df = pd.read_csv(base_url) + df['Date']=pd.to_datetime(df['Date']) + try: + export_start_date = pd.to_datetime(export_start_date) + df = df.query('@export_start_date <= Date') + except KeyError as e: + raise ValueError( + "Tried to convert export_start_date param " + "to datetime and filter but failed. Please " + "check this input." + ) from e + + try: + export_end_date = pd.to_datetime(export_end_date) + df = df.query('export_end_date >= Date') + except KeyError as e: + raise ValueError( + "Tried to convert export_end_date param " + "to datetime and filter but failed. Please " + "check this input." + ) from e + + if df['Date'].shape[0] == 0: + raise ValueError( + "Output df has no rows. Please check " + "if export_start_date is later than " + "export_end_date. Else check if base_url" + " still functional." + ) from e + logger.info("data retrieved from source", num_rows=df.shape[0], num_cols=df.shape[1], @@ -84,16 +118,24 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ) from e # timestamp: str -> datetime - df.columns = ["fips", - "cumulative_counts_tot_vaccine", - "cumulative_counts_tot_vaccine_12P", - "cumulative_counts_tot_vaccine_18P", - "cumulative_counts_tot_vaccine_65P", - "cumulative_counts_part_vaccine", - "cumulative_counts_part_vaccine_12P", - "cumulative_counts_part_vaccine_18P", - "cumulative_counts_part_vaccine_65P", - "timestamp"] + try: + df.columns = ["fips", + "cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "timestamp"] + except KeyError as e: + raise ValueError( + "Tried to name wrong number of columns. The dataset " + "schema may have changed. Please investigate and " + "amend drop_columns." + ) from e + df_dummy = df.loc[(df["fips"]!='00000') & (df["timestamp"] == min(df["timestamp"]))].copy() #handle fips 00000 separately df_oth = df.loc[((df["fips"]=='00000') & diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index cc633f62a..6f8f70f55 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -40,8 +40,10 @@ def run_module(params): __name__, filename=params["common"].get("log_filename"), log_exceptions=params["common"].get("log_exceptions", True)) base_url = params["indicator"]["base_url"] + export_start_date = params["indicator"]["export_start_date"] + export_end_date = params["indicator"]["export_end_date"] ## build the base version of the signal at the most detailed geo level you can get. - all_data = pull_cdcvacc_data(base_url, logger) + all_data = pull_cdcvacc_data(base_url, export_start_date, export_end_date, logger) run_stats = [] ## aggregate & smooth diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template index 3e6af1c23..208f57f91 100644 --- a/cdc_vaccines/params.json.template +++ b/cdc_vaccines/params.json.template @@ -5,7 +5,8 @@ }, "indicator": { "base_url": "https://data.cdc.gov/api/views/8xkx-amqh/rows.csv", - "export_start_date": "2020-12-13" + "export_start_date": "2020-12-13", + "export_end_date": "2020-12-15" }, "validation": { "common": { From 853413db5226fcbeb2abe905b85435caf3823732 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 7 Sep 2021 22:15:31 -0400 Subject: [PATCH 60/95] First pass of the CDC Indicator --- .github/workflows/python-ci.yml | 2 +- cdc_vaccines/.pylintrc | 22 +++ cdc_vaccines/Makefile | 25 +++ cdc_vaccines/README.md | 70 ++++++++ cdc_vaccines/REVIEW.md | 38 ++++ cdc_vaccines/cache/.gitignore | 0 cdc_vaccines/delphi_cdc_vaccines/__init__.py | 13 ++ cdc_vaccines/delphi_cdc_vaccines/__main__.py | 12 ++ cdc_vaccines/delphi_cdc_vaccines/constants.py | 54 ++++++ cdc_vaccines/delphi_cdc_vaccines/pull.py | 162 ++++++++++++++++++ cdc_vaccines/delphi_cdc_vaccines/run.py | 84 +++++++++ cdc_vaccines/params.json.template | 35 ++++ cdc_vaccines/setup.py | 29 ++++ cdc_vaccines/static/.gitignore | 0 .../tests/test_data/bad_extra_cols.csv | 8 + .../tests/test_data/bad_missing_cols.csv | 8 + .../tests/test_data/bad_missing_days.csv | 31 ++++ cdc_vaccines/tests/test_data/small.csv | 109 ++++++++++++ .../tests/test_data/small_confirmed.csv | 0 cdc_vaccines/tests/test_data/small_pull.csv | 4 + cdc_vaccines/tests/test_pull.py | 71 ++++++++ cdc_vaccines/tests/test_run.py | 80 +++++++++ 22 files changed, 856 insertions(+), 1 deletion(-) create mode 100644 cdc_vaccines/.pylintrc create mode 100644 cdc_vaccines/Makefile create mode 100644 cdc_vaccines/README.md create mode 100644 cdc_vaccines/REVIEW.md create mode 100644 cdc_vaccines/cache/.gitignore create mode 100644 cdc_vaccines/delphi_cdc_vaccines/__init__.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/__main__.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/constants.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/pull.py create mode 100644 cdc_vaccines/delphi_cdc_vaccines/run.py create mode 100644 cdc_vaccines/params.json.template create mode 100644 cdc_vaccines/setup.py create mode 100644 cdc_vaccines/static/.gitignore create mode 100644 cdc_vaccines/tests/test_data/bad_extra_cols.csv create mode 100644 cdc_vaccines/tests/test_data/bad_missing_cols.csv create mode 100644 cdc_vaccines/tests/test_data/bad_missing_days.csv create mode 100644 cdc_vaccines/tests/test_data/small.csv create mode 100644 cdc_vaccines/tests/test_data/small_confirmed.csv create mode 100644 cdc_vaccines/tests/test_data/small_pull.csv create mode 100644 cdc_vaccines/tests/test_pull.py create mode 100644 cdc_vaccines/tests/test_run.py diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index adeb011a6..45b8aea32 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -16,7 +16,7 @@ jobs: if: github.event.pull_request.draft == false strategy: matrix: - packages: [_delphi_utils_python, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph_patterns, sir_complainsalot, usafacts] + packages: [_delphi_utils_python, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph_patterns, sir_complainsalot, usafacts, cdc_vaccines] defaults: run: working-directory: ${{ matrix.packages }} diff --git a/cdc_vaccines/.pylintrc b/cdc_vaccines/.pylintrc new file mode 100644 index 000000000..f30837c7e --- /dev/null +++ b/cdc_vaccines/.pylintrc @@ -0,0 +1,22 @@ + +[MESSAGES CONTROL] + +disable=logging-format-interpolation, + too-many-locals, + too-many-arguments, + # Allow pytest functions to be part of a class. + no-self-use, + # Allow pytest classes to have one test. + too-few-public-methods + +[BASIC] + +# Allow arbitrarily short-named variables. +variable-rgx=[a-z_][a-z0-9_]* +argument-rgx=[a-z_][a-z0-9_]* +attr-rgx=[a-z_][a-z0-9_]* + +[DESIGN] + +# Don't complain about pytest "unused" arguments. +ignored-argument-names=(_.*|run_as_module) \ No newline at end of file diff --git a/cdc_vaccines/Makefile b/cdc_vaccines/Makefile new file mode 100644 index 000000000..ca3013248 --- /dev/null +++ b/cdc_vaccines/Makefile @@ -0,0 +1,25 @@ +.PHONY = venv, lint, test, clean + +dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*') + +venv: + python -m venv env + +install: venv + . env/bin/activate; \ + pip install wheel ; \ + pip install -e ../_delphi_utils_python ;\ + pip install -e . + +lint: + . env/bin/activate; pylint $(dir) + . env/bin/activate; pydocstyle $(dir) + +test: + . env/bin/activate ;\ + (cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing) + +clean: + rm -rf env + rm -f params.json + diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md new file mode 100644 index 000000000..b56682d0f --- /dev/null +++ b/cdc_vaccines/README.md @@ -0,0 +1,70 @@ +# CDC Vaccinations + +This indicator provides the number official vaccinations in the US. We export the county-level +daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. +We also aggregate the data to the HHS, State, and Nation levels. +For detailed information see the files DETAILS.md contained in this directory. + +Note that individuals could be vaccinated outside of the US. Additionally, +there is no county level data for counties in Texas and Hawaii. There are unknown +counties in each state and a row for unknown county and unknown state. + + +## Running the Indicator + +The indicator is run by directly executing the Python module contained in this +directory. The safest way to do this is to create a virtual environment, +installed the common DELPHI tools, and then install the module and its +dependencies. To do this, run the following command from this directory: + +``` +make install +``` + +This command will install the package in editable mode, so you can make changes that +will automatically propagate to the installed package. + +All of the user-changable parameters are stored in `params.json`. To execute +the module and produce the output datasets (by default, in `receiving`), run +the following: + +``` +env/bin/python -m delphi_cdc_vaccines +``` + +If you want to enter the virtual environment in your shell, +you can run `source env/bin/activate`. Run `deactivate` to leave the virtual environment. + +Once you are finished, you can remove the virtual environment and +params file with the following: + +``` +make clean +``` + +## Testing the code + +To run static tests of the code style, run the following command: + +``` +make lint +``` + +Unit tests are also included in the module. To execute these, run the following +command from this directory: + +``` +make test +``` + +To run individual tests, run the following: + +``` +(cd tests && ../env/bin/pytest test_run.py --cov=delphi_ --cov-report=term-missing) +``` + +The output will show the number of unit tests that passed and failed, along +with the percentage of code covered by the tests. + +None of the linting or unit tests should fail, and the code lines that are not covered by unit tests should be small and +should not include critical sub-routines. diff --git a/cdc_vaccines/REVIEW.md b/cdc_vaccines/REVIEW.md new file mode 100644 index 000000000..03f87b17a --- /dev/null +++ b/cdc_vaccines/REVIEW.md @@ -0,0 +1,38 @@ +## Code Review (Python) + +A code review of this module should include a careful look at the code and the +output. To assist in the process, but certainly not in replace of it, please +check the following items. + +**Documentation** + +- [ ] the README.md file template is filled out and currently accurate; it is +possible to load and test the code using only the instructions given +- [ ] minimal docstrings (one line describing what the function does) are +included for all functions; full docstrings describing the inputs and expected +outputs should be given for non-trivial functions + +**Structure** + +- [ ] code should pass lint checks (`make lint`) +- [ ] any required metadata files are checked into the repository and placed +within the directory `static` +- [ ] any intermediate files that are created and stored by the module should +be placed in the directory `cache` +- [ ] final expected output files to be uploaded to the API are placed in the +`receiving` directory; output files should not be committed to the respository +- [ ] all options and API keys are passed through the file `params.json` +- [ ] template parameter file (`params.json.template`) is checked into the +code; no personal (i.e., usernames) or private (i.e., API keys) information is +included in this template file + +**Testing** + +- [ ] module can be installed in a new virtual environment (`make install`) +- [ ] reasonably high level of unit test coverage covering all of the main logic +of the code (e.g., missing coverage for raised errors that do not currently seem +possible to reach are okay; missing coverage for options that will be needed are +not) +- [ ] all unit tests run without errors (`make test`) +- [ ] indicator directory has been added to GitHub CI +(`covidcast-indicators/.github/workflows/python-ci.yml`) diff --git a/cdc_vaccines/cache/.gitignore b/cdc_vaccines/cache/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/delphi_cdc_vaccines/__init__.py b/cdc_vaccines/delphi_cdc_vaccines/__init__.py new file mode 100644 index 000000000..6813595b4 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +"""Module to pull and clean indicators from the CDC source. + +This file defines the functions that are made public by the module. As the +module is intended to be executed though the main method, these are primarily +for testing. +""" + +from __future__ import absolute_import +from . import pull +from . import run + +__version__ = "0.1.0" diff --git a/cdc_vaccines/delphi_cdc_vaccines/__main__.py b/cdc_vaccines/delphi_cdc_vaccines/__main__.py new file mode 100644 index 000000000..32fc0eecc --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/__main__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +"""Call the function run_module when executed. + +This file indicates that calling the module (`python -m MODULE_NAME`) will +call the function `run_module` found within the run.py file. There should be +no need to change this template. +""" + +from delphi_utils import read_params +from .run import run_module # pragma: no cover + +run_module(read_params()) # pragma: no cover diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py new file mode 100644 index 000000000..113ecb69e --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -0,0 +1,54 @@ +"""Registry for variations.""" + + +from delphi_utils import Smoother + +FULL_VAC_SUM = "cumulative_counts_tot_vaccine" +FULL_VAC = "counts_tot_vaccine" +FULL_VAC_SUM_12P = "cumulative_counts_tot_vaccine_12P" +FULL_VAC_12P = "counts_tot_vaccine_12P" +FULL_VAC_SUM_18P = "cumulative_counts_tot_vaccine_18P" +FULL_VAC_18P = "counts_tot_vaccine_18P" +FULL_VAC_SUM_65P = "cumulative_counts_tot_vaccine_65P" +FULL_VAC_65P = "counts_tot_vaccine_65P" +PART_VAC_SUM = "cumulative_counts_part_vaccine" +PART_VAC = "counts_part_vaccine" +PART_VAC_SUM_12P = "cumulative_counts_part_vaccine_12P" +PART_VAC_12P = "counts_part_vaccine_12P" +PART_VAC_SUM_18P = "cumulative_counts_part_vaccine_18P" +PART_VAC_18P = "counts_part_vaccine_18P" +PART_VAC_SUM_65P = "cumulative_counts_part_vaccine_65P" +PART_VAC_65P = "counts_part_vaccine_65P" + + + +SIGNALS = [ + FULL_VAC_SUM , + FULL_VAC , + FULL_VAC_SUM_12P , + FULL_VAC_12P , + FULL_VAC_SUM_18P , + FULL_VAC_18P , + FULL_VAC_SUM_65P , + FULL_VAC_65P , + PART_VAC_SUM , + PART_VAC , + PART_VAC_SUM_12P , + PART_VAC_12P , + PART_VAC_SUM_18P , + PART_VAC_18P , + PART_VAC_SUM_65P , + PART_VAC_65P +] + +GEOS = [ + "nation", + "state_code", + "hrr", + "hhs" +] + +SMOOTHERS = [ + (Smoother("identity", impute_method=None), ""), + (Smoother("moving_average", window_length=7), "_7dav"), +] diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py new file mode 100644 index 000000000..e19043163 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +"""Functions for pulling data from the CDC data website for vaccines.""" +import hashlib +from logging import Logger +from delphi_utils.geomap import GeoMapper +import numpy as np +import pandas as pd + + + + + +def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: + """Pull the latest data from the CDC on vaccines and conform it into a dataset. + + The output dataset has: + - Each row corresponds to (County, Date), denoted (FIPS, timestamp) + - Each row additionally has columns that correspond to the counts or + cumulative counts of vaccination status (fully vaccinated, + partially vaccinated) of various age groups (all, 12+, 18+, 65+) + from December 13th 2020 until the latest date + + Note that the raw dataset gives the `cumulative` metrics, from which + we compute `counts` by taking first differences. Hence, `counts` + may be negative. This is wholly dependent on the quality of the raw + dataset. + + We filter the data such that we only keep rows with valid FIPS, or "FIPS" + codes defined under the exceptions of the README. The current exceptions + include: + # - 0: statewise unallocated + Parameters + ---------- + base_url: str + Base URL for pulling the CDC Vaccination Data + logger: Logger + Returns + ------- + pd.DataFrame + Dataframe as described above. + """ + # Columns to drop the the data frame. + drop_columns = [ + "date", + "recip_state", + "series_complete_pop_pct", + "mmwr_week", + "recip_county", + "state_id" + ] + + + # Read data + df = pd.read_csv(base_url) + logger.info("data retrieved from source", + num_rows=df.shape[0], + num_cols=df.shape[1], + min_date=min(df['Date']), + max_date=max(df['Date']), + checksum=hashlib.sha256(pd.util.hash_pandas_object(df).values).hexdigest()) + df.columns = [i.lower() for i in df.columns] + + df['recip_state'] = df['recip_state'].str.lower() + drop_columns.extend([x for x in df.columns if ("pct" in x) | ("svi" in x)]) + drop_columns = list(set(drop_columns)) + df = GeoMapper().add_geocode(df, "state_id", "state_code", + from_col="recip_state", new_col="state_id", dropna=False) + df['state_id'] = df['state_id'].fillna('0').astype(int) + # Change FIPS from 0 to XX000 for statewise unallocated cases/deaths + unassigned_index = (df["fips"] == "UNK") + df.loc[unassigned_index, "fips"] = df["state_id"].loc[unassigned_index].values * 1000 + + # Conform FIPS + df["fips"] = df["fips"].apply(lambda x: f"{int(x):05d}") + df["timestamp"] = pd.to_datetime(df["date"]) + # Drop unnecessary columns (state is pre-encoded in fips) + try: + df.drop(drop_columns, axis=1, inplace=True) + except KeyError as e: + raise ValueError( + "Tried to drop non-existent columns. The dataset " + "schema may have changed. Please investigate and " + "amend drop_columns." + ) from e + # timestamp: str -> datetime + df.columns = ["fips", + "cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "timestamp"] + df_dummy = df.loc[(df["fips"]!='00000') & (df["timestamp"] == min(df["timestamp"]))].copy() + #handle fips 00000 separately + df_oth = df.loc[((df["fips"]=='00000') & + (df["timestamp"]==min(df[df['fips'] == '00000']['timestamp'])))].copy() + df_dummy = pd.concat([df_dummy, df_oth]) + df_dummy.loc[:, "timestamp"] = df_dummy.loc[:, "timestamp"] - pd.Timedelta(days=1) + df_dummy.loc[:, ["cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + ]] = 0 + + df =pd.concat([df_dummy, df]) + # Obtain new_counts + df.sort_values(["fips", "timestamp"], inplace=True) + df["counts_tot_vaccine"] = df["cumulative_counts_tot_vaccine"].diff() # 1st discrete difference + df["counts_tot_vaccine_12P"] = df["cumulative_counts_tot_vaccine_12P"].diff() + df["counts_tot_vaccine_18P"] = df["cumulative_counts_tot_vaccine_18P"].diff() + df["counts_tot_vaccine_65P"] = df["cumulative_counts_tot_vaccine_65P"].diff() + df["counts_part_vaccine"] = df["cumulative_counts_part_vaccine"].diff() + df["counts_part_vaccine_12P"] = df["cumulative_counts_part_vaccine_12P"].diff() + df["counts_part_vaccine_18P"] = df["cumulative_counts_part_vaccine_18P"].diff() + df["counts_part_vaccine_65P"] = df["cumulative_counts_part_vaccine_65P"].diff() + + rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] + # Handle edge cases where we diffed across fips + mask = df["fips"] != df["fips"].shift(1) + df.loc[mask, rem_list] = np.nan + print(rem_list) + df.reset_index(inplace=True, drop=True) + # Final sanity checks + unique_days = df["timestamp"].unique() + min_timestamp = min(unique_days) + max_timestamp = max(unique_days) + n_days = (max_timestamp - min_timestamp) / np.timedelta64(1, "D") + 1 + if n_days != len(unique_days): + raise ValueError( + f"Not every day between {min_timestamp} and " + "{max_timestamp} is represented." + ) + return df.loc[ + df["timestamp"] >= min(df["timestamp"]), + [ # Reorder + "fips", + "timestamp", + "cumulative_counts_tot_vaccine", + "counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "counts_part_vaccine_65P" + ], + ].reset_index(drop=True) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py new file mode 100644 index 000000000..455ca44d2 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +"""Functions to call when running the function. + +This module should contain a function called `run_module`, that is executed +when the module is run with `python -m MODULE_NAME`. `run_module`'s lone argument should be a +nested dictionary of parameters loaded from the params.json file. We expect the `params` to have +the following structure: + - "common": + - "export_dir": str, directory to which the results are exported + - "log_filename": (optional) str, path to log file + - "indicator": (optional) + - "wip_signal": (optional) Any[str, bool], list of signals that are works in progress, or + True if all signals in the registry are works in progress, or False if only + unpublished signals are. See `delphi_utils.add_prefix()` + - Any other indicator-specific settings +""" +from datetime import timedelta, datetime +from itertools import product +import time as tm +import numpy as np +from delphi_utils.export import create_export_csv +from delphi_utils.geomap import GeoMapper +from delphi_utils import get_structured_logger +from .constants import GEOS, SIGNALS, SMOOTHERS +from .pull import pull_cdcvacc_data + + +def run_module(params): + """ + Run the indicator. + + Arguments + -------- + params: Dict[str, Any] + Nested dictionary of parameters. + """ + start_time = tm.time() + logger = get_structured_logger( + __name__, filename=params["common"].get("log_filename"), + log_exceptions=params["common"].get("log_exceptions", True)) + base_url = params["indicator"]["base_url"] + ## build the base version of the signal at the most detailed geo level you can get. + all_data = pull_cdcvacc_data(base_url, logger) + run_stats = [] + ## aggregate & smooth + for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): + logger.info("Running on ", + sensor=sensor, + smoother=smoother, + geo=geo) + df = GeoMapper().replace_geocode( + all_data[['timestamp','fips', sensor]],from_col='fips', + from_code="fips", + new_col="geo_id", + new_code=geo, + date_col="timestamp") + df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( + smoother[0].smooth + ) + df["se"] = np.nan + df["sample_size"] = np.nan + sensor_name = sensor + smoother[1] + if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): + # don't export first 6 days for smoothed signals since they'll be nan. + start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) + exported_csv_dates = create_export_csv( + df, + params["common"]["export_dir"], + geo, + sensor_name, + start_date=start_date) + if len(exported_csv_dates) > 0: + run_stats.append((max(exported_csv_dates), len(exported_csv_dates))) + ## log this indicator run + elapsed_time_in_seconds = round(tm.time() - start_time, 2) + min_max_date = run_stats and min(s[0] for s in run_stats) + csv_export_count = sum(s[-1] for s in run_stats) + max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days + formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d") + logger.info("Completed indicator run", + elapsed_time_in_seconds = elapsed_time_in_seconds, + csv_export_count = csv_export_count, + max_lag_in_days = max_lag_in_days, + oldest_final_export_date = formatted_min_max_date) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template new file mode 100644 index 000000000..3d5337846 --- /dev/null +++ b/cdc_vaccines/params.json.template @@ -0,0 +1,35 @@ +{ + "common": { + "export_dir": "./receiving", + "log_filename": "cdc_vaccines.log" + }, + "indicator": { + "base_url": "https://data.cdc.gov/api/views/8xkx-amqh/rows.csv", + "export_start_date": "2020-12-13" + }, + "validation": { + "common": { + "data_source": "cdc", + "span_length": 14, + "min_expected_lag": {"all": "1"}, + "max_expected_lag": {"all": "7"}, + "dry_run": true, + "suppressed_errors": [] + }, + "static": { + "minimum_sample_size": 0, + "missing_se_allowed": true, + "missing_sample_size_allowed": true + }, + "dynamic": {} + }, + "archive": { + "aws_credentials": { + "aws_access_key_id": "", + "aws_secret_access_key": "" + }, + "bucket_name": "", + "indicator_prefix": "usafacts", + "cache_dir": "./cache" + } +} diff --git a/cdc_vaccines/setup.py b/cdc_vaccines/setup.py new file mode 100644 index 000000000..8802dfd45 --- /dev/null +++ b/cdc_vaccines/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup +from setuptools import find_packages + +required = [ + "numpy", + "pandas", + "pydocstyle", + "pytest", + "pytest-cov", + "pylint==2.8.3", + "delphi-utils", + "covidcast" +] + +setup( + name="delphi_cdc_vaccines", + version="0.0.1", + description="The number of people who are vaccinated per county.", + author="Ananya Joshi", + author_email="aajoshi@andrew.cmu.edu", + url="https://github.com/cmu-delphi/covidcast-indicators", + install_requires=required, + classifiers=[ + "Development Status :: 0 - Attempt", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3.8", + ], + packages=find_packages(), +) diff --git a/cdc_vaccines/static/.gitignore b/cdc_vaccines/static/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/tests/test_data/bad_extra_cols.csv b/cdc_vaccines/tests/test_data/bad_extra_cols.csv new file mode 100644 index 000000000..6642296a1 --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_extra_cols.csv @@ -0,0 +1,8 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI,Extra_Administered_Dose1_Recip_12PlusPop_Pct,Extra_Administered_Dose1_Recip_18Plus,Extra_Administered_Dose1_Recip_18PlusPop_Pct,Extra_Administered_Dose1_Recip_65Plus,Extra_Administered_Dose1_Recip_65PlusPop_Pct,Extra_SVI_CTGY +8/26/21,UNK,34,Unknown County,UNK,0,789625,789591,0,733809,0,55620,0,0,1119266,0,1119203,0,1035082,0,75596,0,,,,,,0,1035082,0,75596,0, +8/26/21,32013,34,Humboldt County,NV,32.9,5537,5535,40.2,5368,43.6,1696,69.9,94.9,6293,37.4,6290,45.6,6014,48.9,1877,77.3,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,45.6,6014,48.9,1877,77.3,Mod-High +8/26/21,47131,34,Obion County,TN,28.4,8529,8529,33.1,8412,35.7,4114,66,97.8,10758,35.8,10755,41.7,10520,44.6,4625,74.2,High,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI,Mod-High VC/High SVI,41.7,10520,44.6,4625,74.2,High +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,,,0,0,0,0,0,High +8/26/21,UNK,34,Unknown County,VA,0,2358403,2352494,0,2206696,0,603704,0,51.3,2705300,0,2696267,0,2516857,0,660454,0,,,,,,0,2516857,0,660454,0, +8/26/21,51678,34,Lexington city,VA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Low-Mod,,,,,0,0,0,0,0,Low-Mod +8/26/21,50025,34,Windham County,VT,56.8,23963,23963,64,22620,65.1,7718,76.5,73.7,27598,65.4,27588,73.7,25830,74.3,8588,85.1,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,73.7,25830,74.3,8588,85.1,Low-Mod \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/bad_missing_cols.csv b/cdc_vaccines/tests/test_data/bad_missing_cols.csv new file mode 100644 index 000000000..4efd5c1bd --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_missing_cols.csv @@ -0,0 +1,8 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_65PlusPop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus +8/26/21,UNK,34,Unknown County,UNK,0,0,1119203,0,1035082,0,75596 +8/26/21,32013,34,Humboldt County,NV,32.9,69.9,6290,45.6,6014,48.9,1877 +8/26/21,47131,34,Obion County,TN,28.4,66,10755,41.7,10520,44.6,4625 +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0 +8/26/21,UNK,34,Unknown County,VA,0,0,2696267,0,2516857,0,660454 +8/26/21,51678,34,Lexington city,VA,0,0,0,0,0,0,0 +8/26/21,50025,34,Windham County,VT,56.8,76.5,27588,73.7,25830,74.3,8588 \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/bad_missing_days.csv b/cdc_vaccines/tests/test_data/bad_missing_days.csv new file mode 100644 index 000000000..75a15510c --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_missing_days.csv @@ -0,0 +1,31 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/25/21,26007,34,Alpena County,MI,44.8,12730,12730,51,12344,53.5,5137,75.8,93.8,13426,47.3,13425,53.8,13293,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/25/21,30071,34,Phillips County,MT,33.8,1337,1337,40.1,1295,43,618,67.5,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/25/21,41029,34,Jackson County,OR,46,101653,101502,53.2,97081,55.2,37628,75.5,97.7,115107,52.1,114881,60.2,109038,62,40450,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/25/21,UNK,34,Unknown County,UNK,0,787349,787315,0,731890,0,55518,0,0,1116357,0,1116294,0,1032597,0,75307,0,,,,, +8/25/21,49041,34,Sevier County,UT,30.7,6630,6630,37.5,6350,41.3,2602,72.9,97.2,8092,37.4,8089,45.8,7606,49.5,2928,82.1,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/25/21,26013,34,Baraga County,MI,48.8,4002,4001,54.4,3927,57.7,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,30071,34,Phillips County,MT,33.8,1337,1337,40.1,1295,43,618,67.5,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/24/21,26007,34,Alpena County,MI,44.8,12719,12719,50.9,12338,53.4,5137,75.8,93.8,13422,47.3,13421,53.8,13289,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/24/21,26013,34,Baraga County,MI,48.8,4002,4001,54.4,3927,57.7,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,41029,34,Jackson County,OR,46,101651,101500,53.2,97079,55.2,37627,75.5,97.7,115101,52.1,114875,60.2,109032,62,40448,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,49041,34,Sevier County,UT,30.6,6607,6607,37.4,6328,41.2,2597,72.8,97.2,8050,37.2,8047,45.6,7567,49.2,2921,81.9,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/24/21,UNK,34,Unknown County,UNK,0,785133,785100,0,730087,0,55397,0,0,1113646,0,1113584,0,1030330,0,75003,0,,,,, +8/23/21,41029,34,Jackson County,OR,45.9,101501,101350,53.1,96953,55.1,37621,75.5,97.7,114861,52,114635,60,108835,61.9,40432,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/23/21,49041,34,Sevier County,UT,30.3,6557,6557,37.1,6295,41,2586,72.5,97.2,7920,36.6,7918,44.8,7478,48.7,2913,81.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/23/21,26007,34,Alpena County,MI,44.8,12714,12714,50.9,12334,53.4,5137,75.8,93.8,13422,47.3,13421,53.8,13289,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/23/21,30071,34,Phillips County,MT,33.8,1336,1336,40.1,1294,42.9,617,67.4,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/23/21,26013,34,Baraga County,MI,48.7,4001,4000,54.4,3926,57.6,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/23/21,UNK,34,Unknown County,UNK,0,783092,783059,0,728591,0,55262,0,0,1111277,0,1111215,0,1028562,0,74699,0,,,,, +8/21/21,49041,33,Sevier County,UT,30.3,6557,6557,37.1,6295,41,2586,72.5,97.2,7919,36.6,7917,44.8,7477,48.7,2913,81.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/21/21,26013,33,Baraga County,MI,48.7,3997,3996,54.4,3922,57.6,1438,77,93.8,4224,51.5,4223,57.5,4155,61,1480,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/21/21,30071,33,Phillips County,MT,33.8,1335,1335,40.1,1293,42.9,617,67.4,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/21/21,41029,33,Jackson County,OR,45.7,101069,100918,52.9,96575,54.9,37554,75.3,97.7,114032,51.6,113806,59.6,108142,61.5,40324,80.9,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/21/21,26007,33,Alpena County,MI,44.7,12697,12697,50.9,12320,53.4,5133,75.7,93.8,13418,47.2,13417,53.7,13285,57.5,5592,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/21/21,UNK,33,Unknown County,UNK,0,777853,777820,0,724853,0,55045,0,0,1103847,0,1103785,0,1023256,0,74303,0,,,,, +8/19/21,26013,33,Baraga County,MI,48.6,3991,3990,54.3,3916,57.5,1437,77,93.8,4220,51.4,4219,57.4,4151,60.9,1480,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,41029,33,Jackson County,OR,45.6,100654,100503,52.6,96215,54.7,37479,75.2,97.7,113175,51.2,112949,59.2,107380,61.1,40201,80.6,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,UNK,33,Unknown County,UNK,0,772959,772926,0,720903,0,54789,0,0,1096323,0,1096261,0,1017125,0,73976,0,,,,, +8/19/21,30071,33,Phillips County,MT,33.5,1326,1326,39.8,1284,42.6,615,67.1,95.6,1501,38,1499,45,1438,47.7,664,72.5,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,49041,33,Sevier County,UT,30.2,6526,6526,37,6267,40.8,2576,72.2,97.1,7827,36.2,7825,44.3,7395,48.1,2904,81.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/19/21,26007,33,Alpena County,MI,44.6,12662,12662,50.7,12296,53.3,5130,75.7,93.8,13406,47.2,13405,53.7,13276,57.5,5591,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/small.csv b/cdc_vaccines/tests/test_data/small.csv new file mode 100644 index 000000000..d4243a035 --- /dev/null +++ b/cdc_vaccines/tests/test_data/small.csv @@ -0,0 +1,109 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/19/21,72121,33,Sabana Grande Municipio,PR,66.1,14360,14360,74.2,13402,75.2,4236,99.9,96,16305,75.1,16305,84.2,15052,84.5,4587,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,72129,33,San Lorenzo Municipio,PR,56.9,20489,20481,63.8,18994,64.3,4771,78,96,24048,66.8,24031,74.9,22111,74.9,5805,94.9,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/19/21,55031,33,Douglas County,WI,35.2,15169,15169,40.3,14557,41.9,5178,62.5,97,18005,41.7,18005,47.9,17155,49.4,5778,69.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,47067,33,Hancock County,TN,24.7,1634,1634,28.6,1613,30.8,676,47,97.8,1818,27.5,1818,31.8,1789,34.2,726,50.5,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/19/21,UNK,33,Unknown County,UNK,0,772959,772926,0,720903,0,54789,0,0,1096323,0,1096261,0,1017125,0,73976,0,,,,, +8/19/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/19/21,49031,33,Piute County,UT,29.7,439,439,33.7,435,38.7,244,58.7,97.1,527,35.6,527,40.5,518,46.1,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,1043,33,Cullman County,AL,27.7,23211,23210,32.5,22877,35.2,9906,63,91.9,29475,35.2,29470,41.3,28641,44.1,11151,70.9,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/19/21,UNK,33,Unknown County,TN,0,59660,59642,0,58874,0,15125,0,97.8,74682,0,74626,0,72349,0,17334,0,,,,, +8/19/21,18109,33,Morgan County,IN,44.5,31398,31397,51.9,30057,55,10392,85.2,98.6,33772,47.9,33771,55.8,32057,58.6,10663,87.4,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/19/21,56009,33,Converse County,WY,25.1,3466,3466,29.9,3397,32.5,1436,60.4,96.4,3971,28.7,3970,34.2,3864,37,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/19/21,18099,33,Marshall County,IN,36.7,16967,16967,43.6,16139,46.5,6135,72.6,98.6,18507,40,18505,47.6,17320,49.9,6355,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/18/21,49031,33,Piute County,UT,29.7,439,439,33.7,435,38.7,244,58.7,97.1,527,35.6,527,40.5,518,46.1,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/18/21,55031,33,Douglas County,WI,35.1,15136,15136,40.2,14529,41.9,5171,62.4,97,17954,41.6,17954,47.7,17116,49.3,5773,69.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/18/21,72129,33,San Lorenzo Municipio,PR,56.9,20465,20457,63.7,18973,64.2,4768,77.9,96,23962,66.6,23945,74.6,22040,74.6,5801,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/18/21,56009,33,Converse County,WY,25.1,3465,3465,29.8,3396,32.5,1436,60.4,96.4,3963,28.7,3962,34.1,3856,36.9,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/18/21,1043,33,Cullman County,AL,27.6,23088,23087,32.4,22765,35,9883,62.8,91.9,29328,35,29323,41.1,28502,43.9,11133,70.8,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/18/21,18109,33,Morgan County,IN,44.5,31348,31347,51.8,30021,54.9,10388,85.1,98.6,33713,47.8,33712,55.7,32002,58.5,10656,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/18/21,72121,33,Sabana Grande Municipio,PR,65.9,14319,14319,73.9,13369,75,4233,99.9,96,16232,74.8,16232,83.8,14988,84.1,4574,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,18099,33,Marshall County,IN,36.6,16939,16939,43.5,16112,46.4,6128,72.5,98.6,18474,39.9,18472,47.5,17293,49.8,6355,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,UNK,33,Unknown County,UNK,0,770751,770718,0,719193,0,54698,0,0,1092878,0,1092816,0,1014393,0,73827,0,,,,, +8/18/21,UNK,33,Unknown County,TN,0,59506,59488,0,58728,0,15089,0,97.8,74350,0,74295,0,72040,0,17267,0,,,,, +8/18/21,47067,33,Hancock County,TN,24.6,1631,1631,28.5,1610,30.7,675,46.9,97.8,1810,27.3,1810,31.6,1783,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/17/21,UNK,33,Unknown County,UNK,0,768634,768601,0,717447,0,54645,0,0,1089178,0,1089116,0,1011387,0,73711,0,,,,, +8/17/21,18099,33,Marshall County,IN,36.5,16891,16891,43.4,16078,46.3,6123,72.4,98.6,18392,39.8,18390,47.3,17259,49.7,6354,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/17/21,55031,33,Douglas County,WI,35,15121,15121,40.2,14515,41.8,5168,62.4,97,17936,41.6,17936,47.7,17098,49.3,5771,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/17/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/17/21,72129,33,San Lorenzo Municipio,PR,56.9,20465,20457,63.7,18973,64.2,4768,77.9,96,23962,66.6,23945,74.6,22040,74.6,5801,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/17/21,UNK,33,Unknown County,TN,0,59345,59327,0,58576,0,15053,0,97.8,74078,0,74023,0,71776,0,17214,0,,,,, +8/17/21,56009,33,Converse County,WY,25.1,3463,3463,29.8,3394,32.5,1436,60.4,96.4,3961,28.7,3960,34.1,3854,36.9,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/17/21,47067,33,Hancock County,TN,24.6,1629,1629,28.5,1608,30.7,675,46.9,97.8,1806,27.3,1806,31.6,1779,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/17/21,18109,33,Morgan County,IN,44.4,31316,31315,51.7,29996,54.9,10387,85.1,98.6,33662,47.8,33661,55.6,31960,58.4,10651,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/17/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/17/21,1043,33,Cullman County,AL,27.5,23032,23031,32.3,22712,35,9875,62.8,91.9,29254,34.9,29249,41,28431,43.8,11126,70.7,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/17/21,72121,33,Sabana Grande Municipio,PR,65.9,14319,14319,73.9,13369,75,4233,99.9,96,16232,74.8,16232,83.8,14988,84.1,4574,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,UNK,33,Unknown County,TN,0,59321,59303,0,58555,0,15048,0,97.8,73946,0,73891,0,71657,0,17203,0,,,,, +8/16/21,18099,33,Marshall County,IN,36.5,16879,16879,43.4,16068,46.3,6122,72.4,98.6,18369,39.7,18367,47.2,17239,49.7,6351,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,1043,33,Cullman County,AL,27.4,22971,22970,32.2,22657,34.9,9869,62.7,91.9,29137,34.8,29132,40.9,28333,43.6,11111,70.6,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/16/21,72129,33,San Lorenzo Municipio,PR,56.8,20426,20418,63.6,18941,64.1,4761,77.8,96,23869,66.3,23852,74.3,21971,74.4,5797,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/16/21,55031,33,Douglas County,WI,35,15120,15120,40.2,14515,41.8,5168,62.4,97,17928,41.5,17928,47.7,17090,49.2,5771,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/16/21,72121,33,Sabana Grande Municipio,PR,65.9,14305,14305,73.9,13359,75,4231,99.9,96,16159,74.4,16159,83.4,14924,83.8,4561,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,56009,33,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3902,28.2,3901,33.6,3809,36.5,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/16/21,18109,33,Morgan County,IN,44.4,31271,31270,51.6,29963,54.8,10384,85.1,98.6,33622,47.7,33621,55.5,31929,58.4,10648,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/16/21,UNK,33,Unknown County,UNK,0,766588,766555,0,716121,0,54544,0,0,1085417,0,1085355,0,1008952,0,73510,0,,,,, +8/16/21,47067,33,Hancock County,TN,24.6,1629,1629,28.5,1608,30.7,676,47,97.8,1805,27.3,1805,31.6,1778,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/16/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/16/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,72129,33,San Lorenzo Municipio,PR,56.7,20413,20405,63.6,18928,64.1,4755,77.7,96,23830,66.2,23813,74.2,21945,74.3,5792,94.7,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/15/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/15/21,47067,33,Hancock County,TN,24.6,1628,1628,28.5,1607,30.7,676,47,97.8,1804,27.3,1804,31.5,1777,33.9,723,50.2,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/15/21,1043,33,Cullman County,AL,27.3,22861,22860,32.1,22570,34.7,9861,62.7,91.9,28943,34.6,28938,40.6,28169,43.4,11095,70.5,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/15/21,18109,33,Morgan County,IN,44.3,31220,31219,51.6,29919,54.7,10381,85.1,98.6,33573,47.6,33572,55.5,31893,58.3,10645,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/15/21,55031,33,Douglas County,WI,34.9,15080,15080,40.1,14483,41.7,5162,62.3,97,17875,41.4,17875,47.5,17057,49.1,5765,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,72121,33,Sabana Grande Municipio,PR,65.9,14299,14299,73.8,13355,75,4230,99.9,96,16119,74.2,16119,83.2,14893,83.6,4555,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/15/21,56009,33,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3901,28.2,3900,33.6,3808,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/15/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,UNK,33,Unknown County,UNK,0,764575,764542,0,714649,0,54426,0,0,1081257,0,1081195,0,1005831,0,73305,0,,,,, +8/15/21,UNK,33,Unknown County,TN,0,59136,59118,0,58379,0,15004,0,97.8,73626,0,73571,0,71349,0,17149,0,,,,, +8/15/21,18099,33,Marshall County,IN,36.5,16868,16868,43.3,16059,46.3,6122,72.4,98.6,18335,39.6,18333,47.1,17209,49.6,6350,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,47067,32,Hancock County,TN,24.6,1626,1626,28.4,1605,30.6,676,47,97.8,1797,27.1,1797,31.4,1771,33.8,721,50.1,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/14/21,18099,32,Marshall County,IN,36.4,16854,16854,43.3,16047,46.2,6120,72.4,98.6,18308,39.6,18306,47,17186,49.5,6347,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,72121,32,Sabana Grande Municipio,PR,65.8,14282,14282,73.8,13340,74.9,4230,99.9,96,16094,74.1,16094,83.1,14873,83.5,4554,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,55031,32,Douglas County,WI,34.9,15080,15080,40.1,14483,41.7,5162,62.3,97,17875,41.4,17875,47.5,17057,49.1,5765,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/14/21,18109,32,Morgan County,IN,44.2,31191,31190,51.5,29898,54.7,10380,85.1,98.6,33520,47.6,33519,55.4,31853,58.3,10643,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/14/21,72129,32,San Lorenzo Municipio,PR,56.6,20380,20372,63.5,18902,64,4751,77.7,96,23744,66,23727,73.9,21886,74.1,5789,94.6,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/14/21,UNK,32,Unknown County,UNK,0,762688,762656,0,713187,0,54336,0,0,1077619,0,1077559,0,1002992,0,73097,0,,,,, +8/14/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/14/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3901,28.2,3900,33.6,3808,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/14/21,1043,32,Cullman County,AL,27.2,22777,22776,31.9,22497,34.6,9848,62.6,91.8,28740,34.3,28735,40.3,27994,43.1,11077,70.4,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/14/21,UNK,32,Unknown County,TN,0,59055,59037,0,58308,0,14997,0,97.8,73414,0,73359,0,71149,0,17126,0,,,,, +8/14/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,18099,32,Marshall County,IN,36.4,16832,16832,43.3,16028,46.2,6117,72.3,98.6,18263,39.5,18261,46.9,17147,49.4,6343,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/13/21,1043,32,Cullman County,AL,27.2,22758,22757,31.9,22480,34.6,9843,62.6,91.8,28693,34.3,28688,40.2,27952,43,11073,70.4,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/13/21,47067,32,Hancock County,TN,24.5,1624,1624,28.4,1605,30.6,676,47,97.8,1797,27.1,1797,31.4,1771,33.8,721,50.1,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/13/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3900,28.2,3899,33.6,3807,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/13/21,UNK,32,Unknown County,TN,0,58888,58870,0,58148,0,14972,0,97.8,73059,0,73004,0,70807,0,17061,0,,,,, +8/13/21,18109,32,Morgan County,IN,44.2,31134,31133,51.4,29852,54.6,10377,85.1,98.6,33443,47.4,33442,55.2,31787,58.1,10635,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/13/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,72129,32,San Lorenzo Municipio,PR,56.5,20335,20327,63.3,18865,63.9,4746,77.6,96,23666,65.8,23649,73.7,21826,73.9,5788,94.6,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/13/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/13/21,55031,32,Douglas County,WI,34.9,15058,15058,40,14466,41.7,5158,62.2,97,17847,41.4,17847,47.5,17031,49.1,5762,69.5,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,72121,32,Sabana Grande Municipio,PR,65.6,14253,14253,73.6,13316,74.7,4225,99.9,96,16071,74,16071,83,14852,83.4,4550,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/13/21,UNK,32,Unknown County,UNK,0,760315,760283,0,711222,0,54262,0,0,1072813,0,1072753,0,998950,0,72929,0,,,,, +8/12/21,UNK,32,Unknown County,UNK,0,758305,758273,0,709538,0,54190,0,0,1068845,0,1068785,0,995682,0,72750,0,,,,, +8/12/21,18099,32,Marshall County,IN,36.4,16817,16817,43.2,16013,46.1,6116,72.3,98.6,18242,39.4,18240,46.9,17129,49.4,6341,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/12/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/12/21,72129,32,San Lorenzo Municipio,PR,56.4,20296,20288,63.2,18833,63.8,4740,77.5,96,23615,65.6,23598,73.5,21786,73.8,5784,94.5,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/12/21,72121,32,Sabana Grande Municipio,PR,65.6,14234,14234,73.5,13303,74.7,4220,99.9,96,16057,74,16057,82.9,14840,83.3,4548,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/12/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/12/21,55031,32,Douglas County,WI,34.9,15040,15040,40,14452,41.6,5153,62.2,97,17820,41.3,17820,47.4,17007,49,5756,69.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/12/21,18109,32,Morgan County,IN,44.1,31079,31078,51.3,29808,54.5,10372,85,98.6,33362,47.3,33361,55.1,31719,58,10630,87.1,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/12/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3899,28.2,3898,33.6,3806,36.4,1589,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/12/21,UNK,32,Unknown County,TN,0,58763,58745,0,58026,0,14940,0,97.8,72726,0,72672,0,70489,0,17016,0,,,,, +8/12/21,1043,32,Cullman County,AL,27.1,22693,22692,31.8,22421,34.5,9833,62.5,91.8,28535,34.1,28530,40,27817,42.8,11062,70.3,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/12/21,47067,32,Hancock County,TN,24.5,1623,1623,28.4,1604,30.6,676,47,97.8,1795,27.1,1795,31.4,1769,33.8,720,50,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/11/21,18099,32,Marshall County,IN,36.3,16794,16794,43.2,15996,46.1,6113,72.3,98.6,18199,39.3,18197,46.8,17097,49.3,6338,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/11/21,72121,32,Sabana Grande Municipio,PR,65.2,14166,14166,73.2,13266,74.5,4214,99.9,96,16023,73.8,16023,82.7,14818,83.2,4545,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/11/21,72129,32,San Lorenzo Municipio,PR,56.3,20279,20271,63.2,18821,63.7,4738,77.4,96,23592,65.6,23575,73.4,21767,73.7,5783,94.5,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/11/21,UNK,32,Unknown County,TN,0,58601,58583,0,57870,0,14895,0,97.8,72472,0,72418,0,70251,0,16956,0,,,,, +8/11/21,47067,32,Hancock County,TN,24.5,1622,1622,28.4,1603,30.6,675,46.9,97.8,1794,27.1,1794,31.4,1768,33.8,720,50,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/11/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/11/21,UNK,32,Unknown County,UNK,0,756368,756338,0,707922,0,54140,0,0,1065083,0,1065026,0,992587,0,72616,0,,,,, +8/11/21,55031,32,Douglas County,WI,34.8,15014,15014,39.9,14434,41.6,5147,62.1,97,17792,41.2,17792,47.3,16986,48.9,5754,69.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/11/21,56009,32,Converse County,WY,24.8,3429,3429,29.5,3365,32.2,1429,60.1,96.4,3895,28.2,3894,33.5,3802,36.4,1587,66.7,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/11/21,18109,32,Morgan County,IN,44,31047,31046,51.3,29782,54.5,10371,85,98.6,33318,47.3,33317,55,31682,57.9,10627,87.1,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/11/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/11/21,1043,32,Cullman County,AL,27,22603,22602,31.7,22338,34.4,9821,62.4,91.8,28324,33.8,28319,39.7,27622,42.5,11036,70.1,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/small_confirmed.csv b/cdc_vaccines/tests/test_data/small_confirmed.csv new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/tests/test_data/small_pull.csv b/cdc_vaccines/tests/test_data/small_pull.csv new file mode 100644 index 000000000..812835e37 --- /dev/null +++ b/cdc_vaccines/tests/test_data/small_pull.csv @@ -0,0 +1,4 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/26/21,UNK,34,Unknown County,UNK,0,789625,789591,0,733809,0,55620,0,0,1119266,0,1119203,0,1035082,0,75596,0,,,,, +8/26/21,32013,34,Humboldt County,NV,32.9,5537,5535,40.2,5368,43.6,1696,69.9,94.9,6293,37.4,6290,45.6,6014,48.9,1877,77.3,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, \ No newline at end of file diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py new file mode 100644 index 000000000..f49d0eff6 --- /dev/null +++ b/cdc_vaccines/tests/test_pull.py @@ -0,0 +1,71 @@ +"""Tests for running the CDC Vaccine indicator.""" +import pytest +import logging +from unittest.mock import patch + +import pandas as pd +import numpy as np +from delphi_cdc_vaccines.pull import pull_cdcvacc_data + +from test_run import local_fetch + +BASE_URL_GOOD = "test_data/small_pull.csv" + +BASE_URL_BAD = { + "missing_days": "test_data/bad_missing_days.csv", + "missing_cols": "test_data/bad_missing_cols.csv", + "extra_cols": "test_data/bad_extra_cols.csv" +} + +TEST_LOGGER = logging.getLogger() + +class TestPullCDCVaccines: + """Tests for the `pull_cdcvacc_data()` function.""" + def test_good_file(self): + """Test the expected output from a smaller file.""" + df = pull_cdcvacc_data(BASE_URL_GOOD, TEST_LOGGER) + expected_df = pd.DataFrame({ + "fips": ["00000","00000","32013","32013","48305","48305"], + "timestamp": [pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), + pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), + pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26")], + "cumulative_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "cumulative_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "cumulative_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "cumulative_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], + "counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + index=[0, 1, 2, 3, 4, 5]) + # sort since rows order doesn't matter + pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index()) + + def test_missing_days(self): + """Test if error is raised when there are missing days.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_days"], TEST_LOGGER + ) + + def test_missing_cols(self): + """Test if error is raised when there are missing columns.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_cols"],TEST_LOGGER + ) + + def test_extra_cols(self): + """Test if error is raised when there are extra columns.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["extra_cols"], TEST_LOGGER + ) \ No newline at end of file diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py new file mode 100644 index 000000000..f65f00764 --- /dev/null +++ b/cdc_vaccines/tests/test_run.py @@ -0,0 +1,80 @@ +"""Tests for running the CDC Vaccine indicator.""" +from itertools import product +from os import listdir +from os.path import join +from unittest.mock import patch + +import pandas as pd + +from delphi_cdc_vaccines.run import run_module + +def local_fetch(url, cache): + return pd.read_csv(url) + +class TestRun: + """Tests for the `run_module()` function.""" + PARAMS = { + "common": { + "export_dir": "./receiving", + "input_dir": "./input_cache" + }, + "indicator": { + "base_url": "./test_data/small.csv", + "export_start_date": "2021-05-01" + } + } + + def test_output_files_exist(self): + """Test that the expected output files exist.""" + run_module(self.PARAMS) + + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + + dates = [ + "20210810", + "20210811", + "20210812", + "20210813", + "20210814", + "20210815", + "20210816", + "20210817", + "20210818", + "20210819", + ] + geos = ["state_code", "hrr", "hhs", "nation"] + + expected_files = [] + for metric in ["cumulative_counts_tot_vaccine", + "counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "counts_part_vaccine_65P"]: + for date in dates: + for geo in geos: + expected_files += [date + "_" + geo + "_" + metric + ".csv"] + if not("cumulative" in metric) and not (date in dates[:6]): + expected_files += [date + "_" + geo + "_" + metric + "_7dav.csv"] + + + assert set(csv_files) == set(expected_files) + + def test_output_file_format(self): + """Test that the output files have the proper format.""" + run_module(self.PARAMS) + + df = pd.read_csv( + join("receiving", "20210819_state_code_counts_tot_vaccine.csv") + ) + assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file From 3b622d7ec92c388733fd76d7df4dfd07530f079b Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:31:42 -0400 Subject: [PATCH 61/95] added explicit dictionary creation --- cdc_vaccines/delphi_cdc_vaccines/run.py | 6 ++++++ cdc_vaccines/tests/test_run.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 455ca44d2..93ee75886 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -43,6 +43,11 @@ def run_module(params): all_data = pull_cdcvacc_data(base_url, logger) run_stats = [] ## aggregate & smooth + + + if not os.path.exists(params["common"]["export_dir"]): + os.makedirs(params["common"]["export_dir"]) + for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): logger.info("Running on ", sensor=sensor, @@ -60,6 +65,7 @@ def run_module(params): df["se"] = np.nan df["sample_size"] = np.nan sensor_name = sensor + smoother[1] + if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index f65f00764..e1ecbb17f 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -20,7 +20,7 @@ class TestRun: }, "indicator": { "base_url": "./test_data/small.csv", - "export_start_date": "2021-05-01" + "export_start_date": "2021-08-10" } } From 68fe6f525f708aac8ecbd50351e76adc941cdf4b Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:36:00 -0400 Subject: [PATCH 62/95] added os import --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 93ee75886..9a6f9cdae 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -18,6 +18,7 @@ from itertools import product import time as tm import numpy as np +import os from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger From 24afea1f075f8b7a9a22ccdcbff7a5a0b2b5ae9a Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 9 Sep 2021 11:43:08 -0400 Subject: [PATCH 63/95] Minor changes for the linter - tests pass locally --- cdc_vaccines/Makefile | 6 +++++- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cdc_vaccines/Makefile b/cdc_vaccines/Makefile index ca3013248..5d1101c53 100644 --- a/cdc_vaccines/Makefile +++ b/cdc_vaccines/Makefile @@ -3,7 +3,7 @@ dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*') venv: - python -m venv env + python3.8 -m venv env install: venv . env/bin/activate; \ @@ -23,3 +23,7 @@ clean: rm -rf env rm -f params.json +run: + env/bin/python -m $(dir) + env/bin/python -m delphi_utils.validator --dry_run + env/bin/python -m delphi_utils.archive \ No newline at end of file diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 9a6f9cdae..c82ee3b16 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -17,8 +17,8 @@ from datetime import timedelta, datetime from itertools import product import time as tm -import numpy as np import os +import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger From 62410e12203c5f753149adb5fcbe5c3983ded69c Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:17:55 -0400 Subject: [PATCH 64/95] minor changes --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 9 ++++++++- cdc_vaccines/delphi_cdc_vaccines/pull.py | 11 ++--------- cdc_vaccines/delphi_cdc_vaccines/run.py | 7 ++++++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 113ecb69e..2896a1f8a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -43,7 +43,7 @@ GEOS = [ "nation", - "state_code", + "state", "hrr", "hhs" ] @@ -52,3 +52,10 @@ (Smoother("identity", impute_method=None), ""), (Smoother("moving_average", window_length=7), "_7dav"), ] + + +DIFFERENCE_MAPPING = { + f"{INCIDENCE}_counts_{completeness}_vaccine{age}": f"{CUMULATIVE}_counts_{completeness}_vaccine{age}" + for completeness, age in product(COMPLETENESS, AGE) +} +SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) \ No newline at end of file diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index e19043163..32a53a4d7 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -112,20 +112,13 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df =pd.concat([df_dummy, df]) # Obtain new_counts df.sort_values(["fips", "timestamp"], inplace=True) - df["counts_tot_vaccine"] = df["cumulative_counts_tot_vaccine"].diff() # 1st discrete difference - df["counts_tot_vaccine_12P"] = df["cumulative_counts_tot_vaccine_12P"].diff() - df["counts_tot_vaccine_18P"] = df["cumulative_counts_tot_vaccine_18P"].diff() - df["counts_tot_vaccine_65P"] = df["cumulative_counts_tot_vaccine_65P"].diff() - df["counts_part_vaccine"] = df["cumulative_counts_part_vaccine"].diff() - df["counts_part_vaccine_12P"] = df["cumulative_counts_part_vaccine_12P"].diff() - df["counts_part_vaccine_18P"] = df["cumulative_counts_part_vaccine_18P"].diff() - df["counts_part_vaccine_65P"] = df["cumulative_counts_part_vaccine_65P"].diff() + for to, from in DIFFERENCE_MAPPING.items(): + df[to] = df[from].diff() rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] # Handle edge cases where we diffed across fips mask = df["fips"] != df["fips"].shift(1) df.loc[mask, rem_list] = np.nan - print(rem_list) df.reset_index(inplace=True, drop=True) # Final sanity checks unique_days = df["timestamp"].unique() diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index c82ee3b16..f5e4751ac 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -50,15 +50,20 @@ def run_module(params): os.makedirs(params["common"]["export_dir"]) for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): + logger.info("Running on ", sensor=sensor, smoother=smoother, geo=geo) + geo_map = geo + if geo=='state': + geo_map='state_code' + df = GeoMapper().replace_geocode( all_data[['timestamp','fips', sensor]],from_col='fips', from_code="fips", new_col="geo_id", - new_code=geo, + new_code=geo_map, date_col="timestamp") df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth From 0e6f6a78742a6490bfa6ad7beb78cd95d9d24d68 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:49:39 -0400 Subject: [PATCH 65/95] Update cdc_vaccines/delphi_cdc_vaccines/__main__.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/__main__.py b/cdc_vaccines/delphi_cdc_vaccines/__main__.py index 32fc0eecc..0aa3f6ac1 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/__main__.py +++ b/cdc_vaccines/delphi_cdc_vaccines/__main__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """Call the function run_module when executed. -This file indicates that calling the module (`python -m MODULE_NAME`) will +This file indicates that calling the module (`python -m delphi_cdc_vaccines`) will call the function `run_module` found within the run.py file. There should be no need to change this template. """ From c2144e5068920c781ef54e18af59d67a0926ce19 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:49:51 -0400 Subject: [PATCH 66/95] Update cdc_vaccines/delphi_cdc_vaccines/constants.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 2896a1f8a..6e8a9620c 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -45,7 +45,8 @@ "nation", "state", "hrr", - "hhs" + "hhs", + "msa" ] SMOOTHERS = [ From 00c59672d90e8c49b36a855e69609116e1f7cc3f Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:50:41 -0400 Subject: [PATCH 67/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index b56682d0f..bcbcee972 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -2,7 +2,7 @@ This indicator provides the number official vaccinations in the US. We export the county-level daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. -We also aggregate the data to the HHS, State, and Nation levels. +We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. Note that individuals could be vaccinated outside of the US. Additionally, From cfed683ca6778b5d56f6602f442064edf7e34017 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:51:55 -0400 Subject: [PATCH 68/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index bcbcee972..3423ca851 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -1,7 +1,7 @@ # CDC Vaccinations This indicator provides the number official vaccinations in the US. We export the county-level -daily vaccination rates data as-is, and publishes the result as a COVIDcast signal. +daily vaccination rates data as-is, and publish the result as a COVIDcast signal. We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. From c341984d1aaf64bc9adef2f5df964cd31fc62cc6 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 13:52:19 -0400 Subject: [PATCH 69/95] Update cdc_vaccines/params.json.template Co-authored-by: Katie Mazaitis --- cdc_vaccines/params.json.template | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template index 3d5337846..bfc654514 100644 --- a/cdc_vaccines/params.json.template +++ b/cdc_vaccines/params.json.template @@ -23,13 +23,4 @@ }, "dynamic": {} }, - "archive": { - "aws_credentials": { - "aws_access_key_id": "", - "aws_secret_access_key": "" - }, - "bucket_name": "", - "indicator_prefix": "usafacts", - "cache_dir": "./cache" - } } From 921dc40376483d7e885847cc5da0eef19ea0e35c Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:03:37 -0400 Subject: [PATCH 70/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index 3423ca851..ea076dc1e 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -1,6 +1,6 @@ # CDC Vaccinations -This indicator provides the number official vaccinations in the US. We export the county-level +This indicator provides the official vaccination counts in the US. We export the county-level daily vaccination rates data as-is, and publish the result as a COVIDcast signal. We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. For detailed information see the files DETAILS.md contained in this directory. From 1566359b3f8df289494fc54468e20d1a4cfa68ba Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:10:39 -0400 Subject: [PATCH 71/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index f5e4751ac..0e52a9584 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -60,7 +60,8 @@ def run_module(params): geo_map='state_code' df = GeoMapper().replace_geocode( - all_data[['timestamp','fips', sensor]],from_col='fips', + all_data[['timestamp','fips', sensor]], + from_col='fips', from_code="fips", new_col="geo_id", new_code=geo_map, From 39f9df53b541fc6425c2cda1b6f0fd71859acbd9 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:12:35 -0400 Subject: [PATCH 72/95] Update cdc_vaccines/delphi_cdc_vaccines/pull.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index 32a53a4d7..e158a0b8c 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -132,24 +132,6 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ) return df.loc[ df["timestamp"] >= min(df["timestamp"]), - [ # Reorder - "fips", - "timestamp", - "cumulative_counts_tot_vaccine", - "counts_tot_vaccine", - "cumulative_counts_tot_vaccine_12P", - "counts_tot_vaccine_12P", - "cumulative_counts_tot_vaccine_18P", - "counts_tot_vaccine_18P", - "cumulative_counts_tot_vaccine_65P", - "counts_tot_vaccine_65P", - "cumulative_counts_part_vaccine", - "counts_part_vaccine", - "cumulative_counts_part_vaccine_12P", - "counts_part_vaccine_12P", - "cumulative_counts_part_vaccine_18P", - "counts_part_vaccine_18P", - "cumulative_counts_part_vaccine_65P", - "counts_part_vaccine_65P" - ], + # Reorder + ["fips", "timestamp"] + SIGNALS, ].reset_index(drop=True) From d42771c4be0fde5a0b1e5536762929aabe04684f Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 10 Sep 2021 15:22:05 -0400 Subject: [PATCH 73/95] changes to the json file --- cdc_vaccines/params.json.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template index bfc654514..3e6af1c23 100644 --- a/cdc_vaccines/params.json.template +++ b/cdc_vaccines/params.json.template @@ -22,5 +22,5 @@ "missing_sample_size_allowed": true }, "dynamic": {} - }, + } } From 4b776dab0b84d9e6e2714e9c75b80adbdaea2aa2 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sat, 11 Sep 2021 13:22:43 -0400 Subject: [PATCH 74/95] changed the signal name generation --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 7 +++---- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- cdc_vaccines/tests/test_pull.py | 23 ++++++++++++++--------- cdc_vaccines/tests/test_run.py | 21 ++++++++++----------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index e158a0b8c..b8e8c6387 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -5,8 +5,7 @@ from delphi_utils.geomap import GeoMapper import numpy as np import pandas as pd - - +from .constants import SIGNALS, DIFFERENCE_MAPPING @@ -112,8 +111,8 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df =pd.concat([df_dummy, df]) # Obtain new_counts df.sort_values(["fips", "timestamp"], inplace=True) - for to, from in DIFFERENCE_MAPPING.items(): - df[to] = df[from].diff() + for to, from_d in DIFFERENCE_MAPPING.items(): + df[to] = df[from_d].diff() rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] # Handle edge cases where we diffed across fips diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 0e52a9584..48c0f24f9 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -72,7 +72,7 @@ def run_module(params): df["se"] = np.nan df["sample_size"] = np.nan sensor_name = sensor + smoother[1] - + print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index f49d0eff6..644f1226c 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -29,22 +29,27 @@ def test_good_file(self): "timestamp": [pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26")], + + "incidence_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "incidence_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "incidence_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "incidence_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "incidence_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "incidence_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "incidence_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "incidence_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], + + + "cumulative_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], - "counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], "cumulative_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], - "counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], "cumulative_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], - "counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], "cumulative_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], - "counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], "cumulative_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], - "counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], "cumulative_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], - "counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], "cumulative_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], - "counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], - "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], - "counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + index=[0, 1, 2, 3, 4, 5]) # sort since rows order doesn't matter pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index()) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index e1ecbb17f..abc966c25 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -42,32 +42,31 @@ def test_output_files_exist(self): "20210818", "20210819", ] - geos = ["state_code", "hrr", "hhs", "nation"] + geos = ["state", "hrr", "hhs", "nation", "msa"] expected_files = [] for metric in ["cumulative_counts_tot_vaccine", - "counts_tot_vaccine", + "incidence_counts_tot_vaccine", "cumulative_counts_tot_vaccine_12P", - "counts_tot_vaccine_12P", + "incidence_counts_tot_vaccine_12P", "cumulative_counts_tot_vaccine_18P", - "counts_tot_vaccine_18P", + "incidence_counts_tot_vaccine_18P", "cumulative_counts_tot_vaccine_65P", - "counts_tot_vaccine_65P", + "incidence_counts_tot_vaccine_65P", "cumulative_counts_part_vaccine", - "counts_part_vaccine", + "incidence_counts_part_vaccine", "cumulative_counts_part_vaccine_12P", - "counts_part_vaccine_12P", + "incidence_counts_part_vaccine_12P", "cumulative_counts_part_vaccine_18P", - "counts_part_vaccine_18P", + "incidence_counts_part_vaccine_18P", "cumulative_counts_part_vaccine_65P", - "counts_part_vaccine_65P"]: + "incidence_counts_part_vaccine_65P"]: for date in dates: for geo in geos: expected_files += [date + "_" + geo + "_" + metric + ".csv"] if not("cumulative" in metric) and not (date in dates[:6]): expected_files += [date + "_" + geo + "_" + metric + "_7dav.csv"] - assert set(csv_files) == set(expected_files) def test_output_file_format(self): @@ -75,6 +74,6 @@ def test_output_file_format(self): run_module(self.PARAMS) df = pd.read_csv( - join("receiving", "20210819_state_code_counts_tot_vaccine.csv") + join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") ) assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file From 17a54dd8ac64b54bfc70f5c91cbe95000d361322 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sat, 11 Sep 2021 13:25:47 -0400 Subject: [PATCH 75/95] committed constants --- cdc_vaccines/delphi_cdc_vaccines/constants.py | 55 +++++-------------- 1 file changed, 13 insertions(+), 42 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py index 6e8a9620c..b8ff83d06 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/constants.py +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -1,45 +1,23 @@ """Registry for variations.""" - +from itertools import product from delphi_utils import Smoother -FULL_VAC_SUM = "cumulative_counts_tot_vaccine" -FULL_VAC = "counts_tot_vaccine" -FULL_VAC_SUM_12P = "cumulative_counts_tot_vaccine_12P" -FULL_VAC_12P = "counts_tot_vaccine_12P" -FULL_VAC_SUM_18P = "cumulative_counts_tot_vaccine_18P" -FULL_VAC_18P = "counts_tot_vaccine_18P" -FULL_VAC_SUM_65P = "cumulative_counts_tot_vaccine_65P" -FULL_VAC_65P = "counts_tot_vaccine_65P" -PART_VAC_SUM = "cumulative_counts_part_vaccine" -PART_VAC = "counts_part_vaccine" -PART_VAC_SUM_12P = "cumulative_counts_part_vaccine_12P" -PART_VAC_12P = "counts_part_vaccine_12P" -PART_VAC_SUM_18P = "cumulative_counts_part_vaccine_18P" -PART_VAC_18P = "counts_part_vaccine_18P" -PART_VAC_SUM_65P = "cumulative_counts_part_vaccine_65P" -PART_VAC_65P = "counts_part_vaccine_65P" +CUMULATIVE = 'cumulative' +INCIDENCE ='incidence' +FREQUENCY = [CUMULATIVE, INCIDENCE] +STATUS = ["tot", "part"] +AGE = ["", "_12P", "_18P", "_65P"] +SIGNALS = [f"{frequency}_counts_{status}_vaccine{AGE}" for + frequency, status, age in product(FREQUENCY, STATUS, AGE)] +DIFFERENCE_MAPPING = { + f"{INCIDENCE}_counts_{status}_vaccine{age}": f"{CUMULATIVE}_counts_{status}_vaccine{age}" + for status, age in product(STATUS, AGE) +} +SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) -SIGNALS = [ - FULL_VAC_SUM , - FULL_VAC , - FULL_VAC_SUM_12P , - FULL_VAC_12P , - FULL_VAC_SUM_18P , - FULL_VAC_18P , - FULL_VAC_SUM_65P , - FULL_VAC_65P , - PART_VAC_SUM , - PART_VAC , - PART_VAC_SUM_12P , - PART_VAC_12P , - PART_VAC_SUM_18P , - PART_VAC_18P , - PART_VAC_SUM_65P , - PART_VAC_65P -] GEOS = [ "nation", @@ -53,10 +31,3 @@ (Smoother("identity", impute_method=None), ""), (Smoother("moving_average", window_length=7), "_7dav"), ] - - -DIFFERENCE_MAPPING = { - f"{INCIDENCE}_counts_{completeness}_vaccine{age}": f"{CUMULATIVE}_counts_{completeness}_vaccine{age}" - for completeness, age in product(COMPLETENESS, AGE) -} -SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) \ No newline at end of file From f162cf6f892f28ae5edb16ccf38d16d5c901fef3 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Fri, 17 Sep 2021 06:30:41 -0400 Subject: [PATCH 76/95] Modified run.py to have the right NaN codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 48c0f24f9..3013c2c5a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -21,7 +21,7 @@ import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper -from delphi_utils import get_structured_logger +from delphi_utils import get_structured_logger, nancodes from .constants import GEOS, SIGNALS, SMOOTHERS from .pull import pull_cdcvacc_data @@ -69,8 +69,8 @@ def run_module(params): df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth ) - df["se"] = np.nan - df["sample_size"] = np.nan + df["se"] = nancodes.Nans.NOT_APPLICABLE + df["sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From 242456c7410d8355b6791d5cc4c81418b44bc713 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Mon, 13 Sep 2021 14:08:57 -0400 Subject: [PATCH 77/95] Update cdc_vaccines/README.md Co-authored-by: Katie Mazaitis --- cdc_vaccines/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md index ea076dc1e..792f9070c 100644 --- a/cdc_vaccines/README.md +++ b/cdc_vaccines/README.md @@ -6,8 +6,7 @@ We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels For detailed information see the files DETAILS.md contained in this directory. Note that individuals could be vaccinated outside of the US. Additionally, -there is no county level data for counties in Texas and Hawaii. There are unknown -counties in each state and a row for unknown county and unknown state. +there is no county level data for counties in Texas and Hawaii. Each state has some vaccination counts assigned to "unknown county". Some vaccination counts are assigned to "unknown state, unknown county". ## Running the Indicator From a5b28ff836415bdd75b2996915daa80804116e4d Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Sun, 19 Sep 2021 11:28:00 -0400 Subject: [PATCH 78/95] Added appropriate NaN codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 3013c2c5a..43dee11f5 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -18,7 +18,6 @@ from itertools import product import time as tm import os -import numpy as np from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper from delphi_utils import get_structured_logger, nancodes From 991c34cb54887ef24bb98bcfecaa53fa64ca008d Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 10:12:13 -0400 Subject: [PATCH 79/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Dmitry Shemetov --- cdc_vaccines/delphi_cdc_vaccines/run.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 43dee11f5..d22dfd76d 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -69,7 +69,10 @@ def run_module(params): smoother[0].smooth ) df["se"] = nancodes.Nans.NOT_APPLICABLE - df["sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["se"] = np.nan + df["sample_size"] = np.nan + df["missing_se"] = nancodes.Nans.NOT_APPLICABLE + df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From e3d42175bcdba6bc1253fe1ced552d07890560c5 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 10:24:48 -0400 Subject: [PATCH 80/95] added back appropriate nan codes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index d22dfd76d..43dee11f5 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -69,10 +69,7 @@ def run_module(params): smoother[0].smooth ) df["se"] = nancodes.Nans.NOT_APPLICABLE - df["se"] = np.nan - df["sample_size"] = np.nan - df["missing_se"] = nancodes.Nans.NOT_APPLICABLE - df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From ab53edef0d29f4328287492e65471905ad527ba0 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Sep 2021 18:15:41 -0400 Subject: [PATCH 81/95] changes to run.py --- cdc_vaccines/delphi_cdc_vaccines/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 43dee11f5..51ab3688b 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -68,8 +68,8 @@ def run_module(params): df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth ) - df["se"] = nancodes.Nans.NOT_APPLICABLE - df["sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["missing_se"] = nancodes.Nans.NOT_APPLICABLE + df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): From b226bd547d588b2766db4d6bb3fb4d637cba5aed Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Mon, 11 Oct 2021 17:48:06 -0400 Subject: [PATCH 82/95] adding test_run changes with new col names --- cdc_vaccines/tests/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index abc966c25..c96e90112 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -76,4 +76,4 @@ def test_output_file_format(self): df = pd.read_csv( join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") ) - assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() \ No newline at end of file + assert (df.columns.values == ["geo_id", "val", "missing_se", "missing_sample_size"]).all() \ No newline at end of file From de8da7aec0e40b35455c3f0e31f4c2139b48ea1c Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:02:48 -0400 Subject: [PATCH 83/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 51ab3688b..a567f0097 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -72,7 +72,7 @@ def run_module(params): df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] print(sensor_name) - if not(("cumulative" in sensor_name) and ("7dav" in sensor_name)): + if not (("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) exported_csv_dates = create_export_csv( From cc8d5ac6cfb91f7014111799898420677b649237 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:02:53 -0400 Subject: [PATCH 84/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index a567f0097..57dd3a403 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -71,7 +71,6 @@ def run_module(params): df["missing_se"] = nancodes.Nans.NOT_APPLICABLE df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE sensor_name = sensor + smoother[1] - print(sensor_name) if not (("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) From 5d1376c9066fc0296d00013fe129c2350b62efb7 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:03:00 -0400 Subject: [PATCH 85/95] Update cdc_vaccines/delphi_cdc_vaccines/run.py Co-authored-by: Katie Mazaitis --- cdc_vaccines/delphi_cdc_vaccines/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 57dd3a403..a8777e433 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -2,7 +2,7 @@ """Functions to call when running the function. This module should contain a function called `run_module`, that is executed -when the module is run with `python -m MODULE_NAME`. `run_module`'s lone argument should be a +when the module is run with `python -m delphi_cdc_vaccines`. `run_module`'s lone argument should be a nested dictionary of parameters loaded from the params.json file. We expect the `params` to have the following structure: - "common": From e7949cc8e7b0e388ced1d675cc688a0fe956f6d5 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:05:20 -0400 Subject: [PATCH 86/95] lint nit --- cdc_vaccines/delphi_cdc_vaccines/run.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index a8777e433..cc633f62a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -2,9 +2,10 @@ """Functions to call when running the function. This module should contain a function called `run_module`, that is executed -when the module is run with `python -m delphi_cdc_vaccines`. `run_module`'s lone argument should be a -nested dictionary of parameters loaded from the params.json file. We expect the `params` to have -the following structure: +when the module is run with `python -m delphi_cdc_vaccines`. +`run_module`'s lone argument should be a nested dictionary of +parameters loaded from the params.json file. +We expect the `params` to have the following structure: - "common": - "export_dir": str, directory to which the results are exported - "log_filename": (optional) str, path to log file From 82225c170e9024367867bf87500616c9b443af60 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:29:20 -0400 Subject: [PATCH 87/95] Modifying for the changes in the base csv file from the CDC --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index b8e8c6387..9b7d8cf89 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -45,7 +45,8 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "series_complete_pop_pct", "mmwr_week", "recip_county", - "state_id" + "state_id", + "metro_status" ] From 6adcd92b2b8a00fd8c6af8a2416615d81d54f267 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 12 Oct 2021 16:53:04 -0400 Subject: [PATCH 88/95] Changes to the CDC Files and respective changes to tests --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 5 +++-- cdc_vaccines/tests/test_data/small_confirmed.csv | 0 cdc_vaccines/tests/test_pull.py | 7 ------- 3 files changed, 3 insertions(+), 9 deletions(-) delete mode 100644 cdc_vaccines/tests/test_data/small_confirmed.csv diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index 9b7d8cf89..f309064b7 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -45,8 +45,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "series_complete_pop_pct", "mmwr_week", "recip_county", - "state_id", - "metro_status" + "state_id" ] @@ -62,6 +61,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: df['recip_state'] = df['recip_state'].str.lower() drop_columns.extend([x for x in df.columns if ("pct" in x) | ("svi" in x)]) + drop_columns.extend(df.columns[22:]) drop_columns = list(set(drop_columns)) df = GeoMapper().add_geocode(df, "state_id", "state_code", from_col="recip_state", new_col="state_id", dropna=False) @@ -82,6 +82,7 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "schema may have changed. Please investigate and " "amend drop_columns." ) from e + # timestamp: str -> datetime df.columns = ["fips", "cumulative_counts_tot_vaccine", diff --git a/cdc_vaccines/tests/test_data/small_confirmed.csv b/cdc_vaccines/tests/test_data/small_confirmed.csv deleted file mode 100644 index e69de29bb..000000000 diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index 644f1226c..b5f8e3447 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -67,10 +67,3 @@ def test_missing_cols(self): pull_cdcvacc_data( BASE_URL_BAD["missing_cols"],TEST_LOGGER ) - - def test_extra_cols(self): - """Test if error is raised when there are extra columns.""" - with pytest.raises(ValueError): - pull_cdcvacc_data( - BASE_URL_BAD["extra_cols"], TEST_LOGGER - ) \ No newline at end of file From 228fc593deea4ec9ab6e21922bbefc6eb79753cf Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Mon, 20 Dec 2021 21:38:30 -0500 Subject: [PATCH 89/95] cleaning up from rebase, still lint error --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 43 +++--------------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index 328bde529..a6e9757b8 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -9,15 +9,8 @@ -<<<<<<< HEAD -<<<<<<< HEAD -def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: -======= -def pull_cdcvacc_data(base_url: str, export_start_date: str, export_end_date: str, logger: Logger) -> pd.DataFrame: ->>>>>>> backup-branch -======= -def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ->>>>>>> 3d66880ed6bf8823b8086d3d9e742d09cb6e3a6b +def pull_cdcvacc_data(base_url: str, export_start_date: str, + export_end_date: str, logger: Logger) -> pd.DataFrame: """Pull the latest data from the CDC on vaccines and conform it into a dataset. The output dataset has: @@ -40,16 +33,10 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ---------- base_url: str Base URL for pulling the CDC Vaccination Data -<<<<<<< HEAD -<<<<<<< HEAD -======= - export_start_date: str + export_start_date: str The start date for the csv file (can be empty) - export_end_date: + export_end_date: The end date for the csv file (can be empty) ->>>>>>> backup-branch -======= ->>>>>>> 3d66880ed6bf8823b8086d3d9e742d09cb6e3a6b logger: Logger Returns ------- @@ -67,7 +54,6 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ] # Read data and cut off by designated start date - df = pd.read_csv(base_url) df['Date']=pd.to_datetime(df['Date']) try: @@ -79,7 +65,6 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "to datetime and filter but failed. Please " "check this input." ) from e - try: export_end_date = pd.to_datetime(export_end_date) df = df.query('export_end_date >= Date') @@ -89,7 +74,6 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "to datetime and filter but failed. Please " "check this input." ) from e - if df['Date'].shape[0] == 0: raise ValueError( "Output df has no rows. Please check " @@ -131,22 +115,6 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: ) from e # timestamp: str -> datetime -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3d66880ed6bf8823b8086d3d9e742d09cb6e3a6b - df.columns = ["fips", - "cumulative_counts_tot_vaccine", - "cumulative_counts_tot_vaccine_12P", - "cumulative_counts_tot_vaccine_18P", - "cumulative_counts_tot_vaccine_65P", - "cumulative_counts_part_vaccine", - "cumulative_counts_part_vaccine_12P", - "cumulative_counts_part_vaccine_18P", - "cumulative_counts_part_vaccine_65P", - "timestamp"] -<<<<<<< HEAD -======= try: df.columns = ["fips", "cumulative_counts_tot_vaccine", @@ -165,9 +133,6 @@ def pull_cdcvacc_data(base_url: str, logger: Logger) -> pd.DataFrame: "amend drop_columns." ) from e ->>>>>>> backup-branch -======= ->>>>>>> 3d66880ed6bf8823b8086d3d9e742d09cb6e3a6b df_dummy = df.loc[(df["fips"]!='00000') & (df["timestamp"] == min(df["timestamp"]))].copy() #handle fips 00000 separately df_oth = df.loc[((df["fips"]=='00000') & From 23380db77860a1f5afbee69b95df33ade8a82a63 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 29 Sep 2021 15:10:47 -0700 Subject: [PATCH 90/95] Cdc vaccines: add basic nancodes --- cdc_vaccines/delphi_cdc_vaccines/run.py | 23 ++++++++++++++++++++--- cdc_vaccines/tests/test_run.py | 4 ++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index 6f8f70f55..f61abdf39 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -19,13 +19,29 @@ from itertools import product import time as tm import os + +from pandas import DataFrame + from delphi_utils.export import create_export_csv from delphi_utils.geomap import GeoMapper -from delphi_utils import get_structured_logger, nancodes +from delphi_utils import get_structured_logger +from delphi_utils.nancodes import Nans from .constants import GEOS, SIGNALS, SMOOTHERS from .pull import pull_cdcvacc_data +def add_nancodes(df: DataFrame) -> DataFrame: + # Default nancodes for a non-survey indicator + df["missing_val"] = Nans.NOT_MISSING + df["missing_se"] = Nans.NOT_APPLICABLE + df["missing_sample_size"] = Nans.NOT_APPLICABLE + + # Mark an values found null to the catch-all category + remaining_nans_mask = df["val"].isnull() & df["missing_val"].eq(Nans.NOT_MISSING) + df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER + + return df + def run_module(params): """ Run the indicator. @@ -71,8 +87,9 @@ def run_module(params): df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( smoother[0].smooth ) - df["missing_se"] = nancodes.Nans.NOT_APPLICABLE - df["missing_sample_size"] = nancodes.Nans.NOT_APPLICABLE + df["se"] = None + df["sample_size"] = None + df = add_nancodes(df) sensor_name = sensor + smoother[1] if not (("cumulative" in sensor_name) and ("7dav" in sensor_name)): # don't export first 6 days for smoothed signals since they'll be nan. diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index c96e90112..e7151b4fc 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -60,7 +60,7 @@ def test_output_files_exist(self): "cumulative_counts_part_vaccine_18P", "incidence_counts_part_vaccine_18P", "cumulative_counts_part_vaccine_65P", - "incidence_counts_part_vaccine_65P"]: + "incidence_counts_part_vaccine_65P"]: for date in dates: for geo in geos: expected_files += [date + "_" + geo + "_" + metric + ".csv"] @@ -76,4 +76,4 @@ def test_output_file_format(self): df = pd.read_csv( join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") ) - assert (df.columns.values == ["geo_id", "val", "missing_se", "missing_sample_size"]).all() \ No newline at end of file + assert (df.columns.values == ["geo_id", "val", "se", "sample_size", "missing_val", "missing_se", "missing_sample_size"]).all() From 3febaf7233179a28e29c51b93beef98bd55bebde Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Tue, 21 Dec 2021 21:39:00 -0500 Subject: [PATCH 91/95] Changed test to account for export start and end date --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 40 +++++++++++------------- cdc_vaccines/delphi_cdc_vaccines/run.py | 8 ++++- cdc_vaccines/tests/test_pull.py | 7 +++-- cdc_vaccines/tests/test_run.py | 3 +- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index a6e9757b8..f068b7f8a 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -57,21 +57,23 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, df = pd.read_csv(base_url) df['Date']=pd.to_datetime(df['Date']) try: - export_start_date = pd.to_datetime(export_start_date) - df = df.query('@export_start_date <= Date') + export_start_date = pd.to_datetime(0) if (pd.to_datetime(export_start_date) + is pd.NaT) else pd.to_datetime(export_start_date) + export_end_date = pd.Timestamp.max if (pd.to_datetime(export_end_date) + is pd.NaT) else pd.to_datetime(export_end_date) except KeyError as e: raise ValueError( - "Tried to convert export_start_date param " - "to datetime and filter but failed. Please " + "Tried to convert export_start/end_date param " + "to datetime but failed. Please " "check this input." ) from e try: - export_end_date = pd.to_datetime(export_end_date) - df = df.query('export_end_date >= Date') + df = df.query('@export_start_date <= Date') + df = df.query('@export_end_date >= Date') except KeyError as e: raise ValueError( - "Tried to convert export_end_date param " - "to datetime and filter but failed. Please " + "Used export_start/end_date param " + "to filter dataframe but failed. Please " "check this input." ) from e if df['Date'].shape[0] == 0: @@ -90,10 +92,10 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, checksum=hashlib.sha256(pd.util.hash_pandas_object(df).values).hexdigest()) df.columns = [i.lower() for i in df.columns] - df['recip_state'] = df['recip_state'].str.lower() - drop_columns.extend([x for x in df.columns if ("pct" in x) | ("svi" in x)]) - drop_columns.extend(df.columns[22:]) - drop_columns = list(set(drop_columns)) + df.loc[:,'recip_state'] = df['recip_state'].str.lower().copy() + + drop_columns = list(set(drop_columns + [x for x in df.columns if + ("pct" in x) | ("svi" in x)] + list(df.columns[22:]))) df = GeoMapper().add_geocode(df, "state_id", "state_code", from_col="recip_state", new_col="state_id", dropna=False) df['state_id'] = df['state_id'].fillna('0').astype(int) @@ -133,11 +135,7 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, "amend drop_columns." ) from e - df_dummy = df.loc[(df["fips"]!='00000') & (df["timestamp"] == min(df["timestamp"]))].copy() - #handle fips 00000 separately - df_oth = df.loc[((df["fips"]=='00000') & - (df["timestamp"]==min(df[df['fips'] == '00000']['timestamp'])))].copy() - df_dummy = pd.concat([df_dummy, df_oth]) + df_dummy = df.loc[(df["timestamp"] == min(df["timestamp"]))].copy() df_dummy.loc[:, "timestamp"] = df_dummy.loc[:, "timestamp"] - pd.Timedelta(days=1) df_dummy.loc[:, ["cumulative_counts_tot_vaccine", "cumulative_counts_tot_vaccine_12P", @@ -162,13 +160,11 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, df.reset_index(inplace=True, drop=True) # Final sanity checks unique_days = df["timestamp"].unique() - min_timestamp = min(unique_days) - max_timestamp = max(unique_days) - n_days = (max_timestamp - min_timestamp) / np.timedelta64(1, "D") + 1 + n_days = (max(unique_days) - min(unique_days)) / np.timedelta64(1, "D") + 1 if n_days != len(unique_days): raise ValueError( - f"Not every day between {min_timestamp} and " - "{max_timestamp} is represented." + f"Not every day between {min(unique_days)} and " + "{max(unique_days)} is represented." ) return df.loc[ df["timestamp"] >= min(df["timestamp"]), diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py index f61abdf39..4475de6d8 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/run.py +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -31,7 +31,13 @@ def add_nancodes(df: DataFrame) -> DataFrame: - # Default nancodes for a non-survey indicator + """ + Provide default nancodes for a non-survey indicator. + + Arguments + -------- + params: DataFrame + """ df["missing_val"] = Nans.NOT_MISSING df["missing_se"] = Nans.NOT_APPLICABLE df["missing_sample_size"] = Nans.NOT_APPLICABLE diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index b5f8e3447..f1321655c 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -23,7 +23,7 @@ class TestPullCDCVaccines: """Tests for the `pull_cdcvacc_data()` function.""" def test_good_file(self): """Test the expected output from a smaller file.""" - df = pull_cdcvacc_data(BASE_URL_GOOD, TEST_LOGGER) + df = pull_cdcvacc_data(BASE_URL_GOOD, "", "", TEST_LOGGER) expected_df = pd.DataFrame({ "fips": ["00000","00000","32013","32013","48305","48305"], "timestamp": [pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), @@ -52,18 +52,19 @@ def test_good_file(self): index=[0, 1, 2, 3, 4, 5]) # sort since rows order doesn't matter + print(df.sort_index().to_string()) pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index()) def test_missing_days(self): """Test if error is raised when there are missing days.""" with pytest.raises(ValueError): pull_cdcvacc_data( - BASE_URL_BAD["missing_days"], TEST_LOGGER + BASE_URL_BAD["missing_days"], "","", TEST_LOGGER ) def test_missing_cols(self): """Test if error is raised when there are missing columns.""" with pytest.raises(ValueError): pull_cdcvacc_data( - BASE_URL_BAD["missing_cols"],TEST_LOGGER + BASE_URL_BAD["missing_cols"],"","",TEST_LOGGER ) diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index e7151b4fc..2f0093b76 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -20,7 +20,8 @@ class TestRun: }, "indicator": { "base_url": "./test_data/small.csv", - "export_start_date": "2021-08-10" + "export_start_date": "2021-08-10", + "export_end_date": "2021-08-19" } } From b74eb5ff82904a5095aa2dced93dda545c814319 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Wed, 22 Dec 2021 11:24:53 -0500 Subject: [PATCH 92/95] Added tests forexport_start_date & export_end_date --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 4 +- cdc_vaccines/tests/test_pull.py | 15 +++++++- cdc_vaccines/tests/test_run.py | 49 +++++++++++++++++++++--- 3 files changed, 59 insertions(+), 9 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index f068b7f8a..fcb6d88d1 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -69,7 +69,7 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, ) from e try: df = df.query('@export_start_date <= Date') - df = df.query('@export_end_date >= Date') + df = df.query('Date <= @export_end_date') except KeyError as e: raise ValueError( "Used export_start/end_date param " @@ -82,7 +82,7 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, "if export_start_date is later than " "export_end_date. Else check if base_url" " still functional." - ) from e + ) logger.info("data retrieved from source", num_rows=df.shape[0], diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index f1321655c..503023777 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -2,7 +2,6 @@ import pytest import logging from unittest.mock import patch - import pandas as pd import numpy as np from delphi_cdc_vaccines.pull import pull_cdcvacc_data @@ -68,3 +67,17 @@ def test_missing_cols(self): pull_cdcvacc_data( BASE_URL_BAD["missing_cols"],"","",TEST_LOGGER ) + + def test_start_date(self): + """ Test that there is an error if start date > end date. """ + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_cols"],"2021-12-12","2021-08-25",TEST_LOGGER + ) + + def test_bad_start_only(self): + """ Test if there is an export_start_date which is not a date""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_cols"],"abcd","2021-08-25",TEST_LOGGER + ) \ No newline at end of file diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py index 2f0093b76..f66dd0403 100644 --- a/cdc_vaccines/tests/test_run.py +++ b/cdc_vaccines/tests/test_run.py @@ -1,9 +1,8 @@ """Tests for running the CDC Vaccine indicator.""" from itertools import product -from os import listdir +from os import listdir, remove from os.path import join from unittest.mock import patch - import pandas as pd from delphi_cdc_vaccines.run import run_module @@ -21,10 +20,12 @@ class TestRun: "indicator": { "base_url": "./test_data/small.csv", "export_start_date": "2021-08-10", - "export_end_date": "2021-08-19" + "export_end_date": "2021-08-17" } } + + def test_output_files_exist(self): """Test that the expected output files exist.""" run_module(self.PARAMS) @@ -40,8 +41,6 @@ def test_output_files_exist(self): "20210815", "20210816", "20210817", - "20210818", - "20210819", ] geos = ["state", "hrr", "hhs", "nation", "msa"] @@ -68,13 +67,51 @@ def test_output_files_exist(self): if not("cumulative" in metric) and not (date in dates[:6]): expected_files += [date + "_" + geo + "_" + metric + "_7dav.csv"] + print(set(csv_files)-set(expected_files)) assert set(csv_files) == set(expected_files) + # Remove the csv_files from the directory + [remove(join("receiving", f)) for f in csv_files] def test_output_file_format(self): """Test that the output files have the proper format.""" + self.PARAMS['indicator']['export_start_date'] = "2021-08-19" + self.PARAMS['indicator']['export_end_date'] = "2021-08-19" run_module(self.PARAMS) - + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] df = pd.read_csv( join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") ) assert (df.columns.values == ["geo_id", "val", "se", "sample_size", "missing_val", "missing_se", "missing_sample_size"]).all() + # Remove the csv_files from the directory + [remove(join("receiving", f)) for f in csv_files] + + def test_end_date(self): + """ Test if there is only a end date, that the correct range of dates for the files are returned. """ + self.PARAMS['indicator']['export_start_date'] = "" + self.PARAMS['indicator']['export_end_date'] = "2021-08-11" + run_module(self.PARAMS) + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + list_dates = set([f.split("_")[0] for f in csv_files]) + assert(list_dates == {"20210810", "20210811"}) + # Remove the .csv files from the directory + [remove(join("receiving", f)) for f in csv_files] + + def test_delta(self): + """ Test if the correct range of dates for the files are returned. """ + self.PARAMS['indicator']['export_start_date'] = "2021-08-10" + self.PARAMS['indicator']['export_end_date'] = "2021-08-11" + run_module(self.PARAMS) + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + list_dates = set([f.split("_")[0] for f in csv_files]) + assert(list_dates == {'20210810', '20210811'}) + # Remove the .csv files from the directory + [remove(join("receiving", f)) for f in csv_files] + + + + + + + + + From 3da0a85b8c1d1a824fc2c512b8d18dd01abaaab5 Mon Sep 17 00:00:00 2001 From: Ananya-Joshi Date: Thu, 13 Jan 2022 19:04:26 -0500 Subject: [PATCH 93/95] Update cdc_vaccines/delphi_cdc_vaccines/pull.py Co-authored-by: Dmitry Shemetov --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index fcb6d88d1..c7958691d 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -148,16 +148,10 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, ]] = 0 df =pd.concat([df_dummy, df]) - # Obtain new_counts - df.sort_values(["fips", "timestamp"], inplace=True) + df = df.set_index(["fips", "timestamp"]) for to, from_d in DIFFERENCE_MAPPING.items(): - df[to] = df[from_d].diff() - - rem_list = [ x for x in list(df.columns) if x not in ['timestamp', 'fips'] ] - # Handle edge cases where we diffed across fips - mask = df["fips"] != df["fips"].shift(1) - df.loc[mask, rem_list] = np.nan - df.reset_index(inplace=True, drop=True) + df[to] = df.groupby(level=0)[from_d].diff() + df.reset_index(inplace=True) # Final sanity checks unique_days = df["timestamp"].unique() n_days = (max(unique_days) - min(unique_days)) / np.timedelta64(1, "D") + 1 From ba8901845fb08a9ebfa026919a429619820aded4 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 13 Jan 2022 17:29:01 -0800 Subject: [PATCH 94/95] Fix test_pull issue --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index c7958691d..f3b56d2e3 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -145,13 +145,14 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, "cumulative_counts_part_vaccine_12P", "cumulative_counts_part_vaccine_18P", "cumulative_counts_part_vaccine_65P", - ]] = 0 + ]] = np.nan - df =pd.concat([df_dummy, df]) + df = pd.concat([df_dummy, df]) df = df.set_index(["fips", "timestamp"]) for to, from_d in DIFFERENCE_MAPPING.items(): df[to] = df.groupby(level=0)[from_d].diff() - df.reset_index(inplace=True) + df.reset_index(inplace=True) + # Final sanity checks unique_days = df["timestamp"].unique() n_days = (max(unique_days) - min(unique_days)) / np.timedelta64(1, "D") + 1 From 0bf112e6e7dd8b31a2b8b95c829d391462021474 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 13 Jan 2022 18:22:10 -0800 Subject: [PATCH 95/95] Fix test_pull --- cdc_vaccines/delphi_cdc_vaccines/pull.py | 17 ++++++++++++++--- cdc_vaccines/tests/test_pull.py | 6 +++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py index f3b56d2e3..59475c678 100644 --- a/cdc_vaccines/delphi_cdc_vaccines/pull.py +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -135,8 +135,9 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, "amend drop_columns." ) from e - df_dummy = df.loc[(df["timestamp"] == min(df["timestamp"]))].copy() - df_dummy.loc[:, "timestamp"] = df_dummy.loc[:, "timestamp"] - pd.Timedelta(days=1) + min_time = min(df["timestamp"]) + df_dummy = df.loc[(df["timestamp"] == min_time)].copy() + df_dummy.loc[:, "timestamp"] = min_time - pd.Timedelta(days=1) df_dummy.loc[:, ["cumulative_counts_tot_vaccine", "cumulative_counts_tot_vaccine_12P", "cumulative_counts_tot_vaccine_18P", @@ -145,12 +146,22 @@ def pull_cdcvacc_data(base_url: str, export_start_date: str, "cumulative_counts_part_vaccine_12P", "cumulative_counts_part_vaccine_18P", "cumulative_counts_part_vaccine_65P", - ]] = np.nan + ]] = 0 df = pd.concat([df_dummy, df]) df = df.set_index(["fips", "timestamp"]) for to, from_d in DIFFERENCE_MAPPING.items(): df[to] = df.groupby(level=0)[from_d].diff() + idx = pd.IndexSlice + df.loc[idx[:, min_time - pd.Timedelta(days=1)], ["cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + ]] = np.nan df.reset_index(inplace=True) # Final sanity checks diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py index 503023777..de5297e35 100644 --- a/cdc_vaccines/tests/test_pull.py +++ b/cdc_vaccines/tests/test_pull.py @@ -48,11 +48,11 @@ def test_good_file(self): "cumulative_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], "cumulative_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, - + index=[0, 1, 2, 3, 4, 5]) + # sort since rows order doesn't matter - print(df.sort_index().to_string()) - pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index()) + pd.testing.assert_frame_equal(df.set_index(["fips", "timestamp"]).sort_index(), expected_df.set_index(["fips", "timestamp"]).sort_index()) def test_missing_days(self): """Test if error is raised when there are missing days."""