diff --git a/cdc_covidnet/delphi_cdc_covidnet/__init__.py b/cdc_covidnet/delphi_cdc_covidnet/__init__.py index 64b0655cd..4ad9dd3f6 100644 --- a/cdc_covidnet/delphi_cdc_covidnet/__init__.py +++ b/cdc_covidnet/delphi_cdc_covidnet/__init__.py @@ -10,7 +10,6 @@ from . import run from . import api_config -from . import geo_maps from . import update_sensor from . import covidnet diff --git a/cdc_covidnet/delphi_cdc_covidnet/geo_maps.py b/cdc_covidnet/delphi_cdc_covidnet/geo_maps.py deleted file mode 100644 index f7f2add33..000000000 --- a/cdc_covidnet/delphi_cdc_covidnet/geo_maps.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -Contains geographic mapping tools. - -Author: Eu Jing Chua -Created: 2020-06-12 -""" - -from os.path import join - -import pandas as pd - -from .api_config import APIConfig - -class GeoMaps: - """ - Class to handle any geography-related mappings - """ - - def __init__(self, geo_filepath: str): - self.geo_filepath = geo_filepath - - def state_name_to_abbr(self, data: pd.DataFrame) -> pd.DataFrame: - """ - Converts the geo_id column from a full state name to the two-letter abbreviation - - Args: - data: The pd.DataFrame with the geo_id column to be converted - - Returns: - The modified pd.DataFrame after conversion - """ - # Read in geographical mappings - state_map = pd.read_csv( - join(self.geo_filepath, "02_20_uszips.csv"), - usecols=["state_id", "state_name"]) - state_map.drop_duplicates(inplace=True) - - # State map is just the Series of state name -> state id - state_map.set_index("state_name", drop=True, inplace=True) - state_map = state_map["state_id"] - - # Map state name to state two-letter abbreviation - data[APIConfig.STATE_COL] = data[APIConfig.STATE_COL].map(state_map) - - return data diff --git a/cdc_covidnet/delphi_cdc_covidnet/update_sensor.py b/cdc_covidnet/delphi_cdc_covidnet/update_sensor.py index b90ef0a78..a434f1e40 100644 --- a/cdc_covidnet/delphi_cdc_covidnet/update_sensor.py +++ b/cdc_covidnet/delphi_cdc_covidnet/update_sensor.py @@ -12,11 +12,10 @@ import numpy as np import pandas as pd -from delphi_utils import read_params +from delphi_utils import read_params, GeoMapper import covidcast from .api_config import APIConfig from .covidnet import CovidNet -from .geo_maps import GeoMaps from .constants import SIGNALS def write_to_csv(data: pd.DataFrame, out_name: str, output_path: str): @@ -49,9 +48,11 @@ def write_to_csv(data: pd.DataFrame, out_name: str, output_path: str): def update_sensor( - state_files: List[str], mmwr_info: pd.DataFrame, - output_path: str, static_path: str, - start_date: datetime, end_date: datetime) -> pd.DataFrame: + state_files: List[str], + mmwr_info: pd.DataFrame, + output_path: str, + start_date: datetime, + end_date: datetime) -> pd.DataFrame: """ Generate sensor values, and write to csv format. @@ -59,7 +60,6 @@ def update_sensor( state_files: List of JSON files representing COVID-NET hospitalization data for each state mmwr_info: Mappings from MMWR week to actual dates, as a pd.DataFrame output_path: Path to write the csvs to - static_path: Path for the static geographic fiels start_date: First sensor date (datetime.datetime) end_date: Last sensor date (datetime.datetime) @@ -85,9 +85,15 @@ def update_sensor( ] # Set state id to two-letter abbreviation - geo_map = GeoMaps(static_path) - hosp_df = geo_map.state_name_to_abbr(hosp_df) - + gmpr = GeoMapper() + hosp_df = gmpr.add_geocode(hosp_df, + from_col=APIConfig.STATE_COL, + from_code="state_name", + new_code="state_id", + dropna=False) + # To use the original column name, reassign original column and drop new one + hosp_df[APIConfig.STATE_COL] = hosp_df["state_id"].str.upper() + hosp_df.drop("state_id", axis=1, inplace=True) assert not hosp_df.duplicated(["date", "geo_id"]).any(), "Non-unique (date, geo_id) pairs" hosp_df.set_index(["date", "geo_id"], inplace=True) diff --git a/cdc_covidnet/tests/test_geomaps.py b/cdc_covidnet/tests/test_geomaps.py deleted file mode 100644 index fd51208f9..000000000 --- a/cdc_covidnet/tests/test_geomaps.py +++ /dev/null @@ -1,38 +0,0 @@ -import pandas as pd - -from delphi_utils import read_params -from delphi_cdc_covidnet.api_config import APIConfig -from delphi_cdc_covidnet.geo_maps import GeoMaps - -params = read_params() - -class TestGeoMaps: - geomap = GeoMaps(params["static_file_dir"]) - - def test_state_name_to_abbr(self): - # Mappings of the 14 participating states on 06/15/2020 - state_abbr = [ - ("California", "CA"), - ("Colorado", "CO"), - ("Connecticut", "CT"), - ("Georgia", "GA"), - ("Maryland", "MD"), - ("Minnesota", "MN"), - ("New Mexico", "NM"), - ("New York", "NY"), - ("Oregon", "OR"), - ("Tennessee", "TN"), - ("Iowa", "IA"), - ("Michigan", "MI"), - ("Ohio", "OH"), - ("Utah", "UT") - ] - - state_df = pd.DataFrame(state_abbr, columns=[APIConfig.STATE_COL, "abbr"]) - - # Perform mapping - state_df = self.geomap.state_name_to_abbr(state_df) - - # Check that the mapping was right - assert (state_df[APIConfig.STATE_COL].str.len() == 2).all() - assert (state_df[APIConfig.STATE_COL] == state_df["abbr"]).all() diff --git a/cdc_covidnet/tests/test_update_sensor.py b/cdc_covidnet/tests/test_update_sensor.py index ad435a5d1..fe9499e23 100644 --- a/cdc_covidnet/tests/test_update_sensor.py +++ b/cdc_covidnet/tests/test_update_sensor.py @@ -75,10 +75,7 @@ def test_syn_update_sensor(self): end_date = datetime(year=2020, month=3, day=17) # Generate the csvs - hosp_df = update_sensor( - state_files, mmwr_info, - temp_dir, STATIC_DIR, - start_date, end_date) + hosp_df = update_sensor(state_files, mmwr_info, temp_dir, start_date, end_date) # Check dataframe returned assert hosp_df.index.nlevels == 2