diff --git a/scripts/us_epa/ejscreen/README.md b/scripts/us_epa/ejscreen/README.md index e531f63ac3..10ddec5e20 100644 --- a/scripts/us_epa/ejscreen/README.md +++ b/scripts/us_epa/ejscreen/README.md @@ -20,9 +20,12 @@ which are a small subset of the available EJSCREEN variables. To generate `ejscreen_airpollutants.csv` and `ejscreen.tmcf` run the following: - `python3 ejscreen.py` +#Downloading and Processing Data +To perform "download and process", run the below command: python3 ejscreen.py Running this command generates input_fles and csv, mcf, tmcf files -As of July, 2021 this includes data through the end of 2020. +If you want to perform "only process", run the below command: python3 ejscreen.py --mode=process + +If you want to perform "only download", run the below command: python3 ejscreen.py --mode=download ### Unit Tests diff --git a/scripts/us_epa/ejscreen/config.json b/scripts/us_epa/ejscreen/config.json new file mode 100644 index 0000000000..9bed0a5556 --- /dev/null +++ b/scripts/us_epa/ejscreen/config.json @@ -0,0 +1,99 @@ +{ + "YEARS": [ "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"], + "NORM_CSV_COLUMNS": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "NORM_CSV_COLUMNS1": ["ID", "DSLPM", "OZONE", "PM25"], + "CSV_COLUMNS_BY_YEAR": { + "2015": ["FIPS", "dpm", "cancer", "resp", "o3", "pm"], + "2016": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "2017": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "2018": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "2019": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "2020": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "2021": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "2022": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "2023": ["ID", "DSLPM", "CANCER", "RESP", "OZONE", "PM25"], + "2024": ["ID", "DSLPM", "OZONE", "PM25"] + }, + "ZIP_FILENAMES": { + "2015": "EJSCREEN_20150505.csv", + "2016": "EJSCREEN_V3_USPR_090216_CSV", + "2017": null, + "2018": "EJSCREEN_2018_USPR_csv", + "2019": "EJSCREEN_2019_USPR.csv", + "2020": "EJSCREEN_2020_USPR.csv", + "2021": "EJSCREEN_2021_USPR.csv", + "2022": "EJSCREEN_2022_with_AS_CNMI_GU_VI.csv", + "2023": "EJSCREEN_2023_BG_with_AS_CNMI_GU_VI.csv", + "2024": "EJScreen_2024_Tract_with_AS_CNMI_GU_VI.csv" + }, + "FILENAMES": { + "2015": "EJSCREEN_20150505", + "2016": "EJSCREEN_Full_V3_USPR_TSDFupdate", + "2017": "EJSCREEN_2017_USPR_Public", + "2018": "EJSCREEN_Full_USPR_2018", + "2019": "EJSCREEN_2019_USPR", + "2020": "EJSCREEN_2020_USPR", + "2021": "EJSCREEN_2021_USPR", + "2022": "EJSCREEN_2022_Full_with_AS_CNMI_GU_VI", + "2023": "EJSCREEN_2023_BG_with_AS_CNMI_GU_VI", + "2024": "EJScreen_2024_Tract_with_AS_CNMI_GU_VI" + }, + "TEMPLATE_MCF": [ + { + "Node": "E:ejscreen_airpollutants->E0", + "typeOf": "dcs:StatVarObservation", + "variableMeasured": "dcs:Mean_Concentration_AirPollutant_DieselPM", + "observationDate": "C:ejscreen_airpollutants->year", + "observationAbout": "C:ejscreen_airpollutants->FIPS", + "observationPeriod": "dcs:P1Y", + "value": "C:ejscreen_airpollutants->DSLPM", + "unit": "dcs:MicrogramsPerCubicMeter" + }, + { + "Node": "E:ejscreen_airpollutants->E1", + "typeOf": "dcs:StatVarObservation", + "variableMeasured": "dcs:AirPollutant_Cancer_Risk", + "observationDate": "C:ejscreen_airpollutants->year", + "observationAbout": "C:ejscreen_airpollutants->FIPS", + "observationPeriod": "dcs:P1Y", + "value": "C:ejscreen_airpollutants->CANCER", + "unit": "dcs:PerMillionPerson" + }, + { + "Node": "E:ejscreen_airpollutants->E2", + "typeOf": "dcs:StatVarObservation", + "variableMeasured": "dcs:AirPollutant_Respiratory_Hazard", + "observationDate": "C:ejscreen_airpollutants->year", + "observationAbout": "C:ejscreen_airpollutants->FIPS", + "observationPeriod": "dcs:P1Y", + "value": "C:ejscreen_airpollutants->RESP" + }, + { + "Node": "E:ejscreen_airpollutants->E3", + "typeOf": "dcs:StatVarObservation", + "variableMeasured": "dcs:Mean_Concentration_AirPollutant_Ozone", + "observationDate": "C:ejscreen_airpollutants->year", + "observationAbout": "C:ejscreen_airpollutants->FIPS", + "observationPeriod": "dcs:P1Y", + "value": "C:ejscreen_airpollutants->OZONE", + "unit": "dcs:PartsPerBillion" + }, + { + "Node": "E:ejscreen_airpollutants->E4", + "typeOf": "dcs:StatVarObservation", + "variableMeasured": "dcs:Mean_Concentration_AirPollutant_PM2.5", + "observationDate": "C:ejscreen_airpollutants->year", + "observationAbout": "C:ejscreen_airpollutants->FIPS", + "observationPeriod": "dcs:P1Y", + "value": "C:ejscreen_airpollutants->PM25", + "unit": "dcs:MicrogramsPerCubicMeter" + } + ] + , + "BASE_URL": "https://gaftp.epa.gov/EJSCREEN", + "URL_SUFFIX": { + "2023": "2.22_September_UseMe", + "2024": "2.32_August_UseMe" + }, + "RENAME_COLUMNS_YEARS": ["2024"] +} diff --git a/scripts/us_epa/ejscreen/ejscreen.py b/scripts/us_epa/ejscreen/ejscreen.py index ea9f15ed3b..904f3a0291 100644 --- a/scripts/us_epa/ejscreen/ejscreen.py +++ b/scripts/us_epa/ejscreen/ejscreen.py @@ -1,103 +1,91 @@ -''' -Generates cleaned CSV for the EPA EJSCREEN data and TMCF. -Usage: python3 ejscreen.py -''' +# Copyright 2023 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os import io import zipfile import requests import pandas as pd +import json +from absl import logging, flags, app +import sys +import time +from retry import retry + +_MODULE_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.join(_MODULE_DIR, '../../../util/')) +import file_util + +logging.set_verbosity(logging.INFO) + +_FLAGS = flags.FLAGS + +flags.DEFINE_string('config_path', + 'gs://unresolved_mcf/epa/ejscreen/config.json', + 'Path to config file') +flags.DEFINE_string( + 'mode', '', + 'Mode of operation: "download" to only download, "process" to only process, leave empty for both.' +) + + +# Function to build the correct URL for each year +def build_url(year, zip_filename=None): + if zip_filename: + if year in URL_SUFFIX: + url = f'{BASE_URL}/{year}/{URL_SUFFIX[year]}/{zip_filename}.zip' + else: + url = f'{BASE_URL}/{year}/{zip_filename}.zip' + else: + url = f'{BASE_URL}/{year}/{FILENAMES[year]}.csv' + return url + + +@retry(tries=5, delay=5, backoff=5) +def download_with_retry(url): + logging.info(f"Downloading URL : {url}") + return requests.get(url=url, verify=False) + + +# Download the file and save it in the input folder +def download_file(url, year, input_folder, zip_filename=None): + try: + response = download_with_retry(url) + if response.status_code == 200: + os.makedirs(input_folder, exist_ok=True) + + file_path = os.path.join( + input_folder, f'{year}.zip' if zip_filename else f'{year}.csv') + with open(file_path, 'wb') as f: + f.write(response.content) + logging.info(f"File downloaded and saved as {file_path}") + return + else: + logging.fatal( + f"Failed to download file for {year}. HTTP Status Code: {response.status_code} URL : {url}" + ) + except Exception as e: + logging.fatal(f"Failed to download file for {year} after {url} .") + -YEARS = ['2015', '2016', '2017', '2018', '2019', '2020'] - -NORM_CSV_COLUMNS = ['ID', 'DSLPM', 'CANCER', 'RESP', 'OZONE', 'PM25'] - -# 2015 has different csv column names -CSV_COLUMNS_BY_YEAR = { - '2015': ['FIPS', 'dpm', 'cancer', 'resp', 'o3', 'pm'], - '2016': NORM_CSV_COLUMNS, - '2017': NORM_CSV_COLUMNS, - '2018': NORM_CSV_COLUMNS, - '2019': NORM_CSV_COLUMNS, - '2020': NORM_CSV_COLUMNS -} - -ZIP_FILENAMES = { - '2015': 'EJSCREEN_20150505.csv', - '2016': 'EJSCREEN_V3_USPR_090216_CSV', - '2017': None, - '2018': 'EJSCREEN_2018_USPR_csv', - '2019': 'EJSCREEN_2019_USPR.csv', - '2020': 'EJSCREEN_2020_USPR.csv' -} - -FILENAMES = { - '2015': 'EJSCREEN_20150505', - '2016': 'EJSCREEN_Full_V3_USPR_TSDFupdate', - '2017': 'EJSCREEN_2017_USPR_Public', - '2018': 'EJSCREEN_Full_USPR_2018', - '2019': 'EJSCREEN_2019_USPR', - '2020': 'EJSCREEN_2020_USPR' -} - -TEMPLATE_MCF = ''' -Node: E:ejscreen_airpollutants->E0 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:Mean_Concentration_AirPollutant_DieselPM -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->DSLPM -unit: dcs:MicrogramsPerCubicMeter - -Node: E:ejscreen_airpollutants->E1 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:AirPollutant_Cancer_Risk -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->CANCER - -Node: E:ejscreen_airpollutants->E2 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:AirPollutant_Respiratory_Hazard -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->RESP - -Node: E:ejscreen_airpollutants->E3 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:Mean_Concentration_AirPollutant_Ozone -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->OZONE -unit: dcs:PartsPerBillion - -Node: E:ejscreen_airpollutants->E4 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:Mean_Concentration_AirPollutant_PM2.5 -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->PM25 -unit: dcs:MicrogramsPerCubicMeter -''' - - -# data: dictionary of dataframes in the format {year: dataframe} -# outfilename: name of the csv that data will be written to -# write_csv concatenates the dataframe from each year together +# Data processing function def write_csv(data, outfilename): full_df = pd.DataFrame() for curr_year, one_year_df in data.items(): - one_year_df['year'] = curr_year # add year column - full_df = pd.concat( - [full_df, one_year_df], - ignore_index=True) # concatenate year onto larger dataframe + one_year_df['year'] = curr_year + full_df = pd.concat([full_df, one_year_df], ignore_index=True) - # sort by FIPS and make into dcid full_df = full_df.rename(columns={'ID': 'FIPS'}) full_df = full_df.sort_values(by=['FIPS'], ignore_index=True) full_df['FIPS'] = 'dcid:geoId/' + ( @@ -108,32 +96,111 @@ def write_csv(data, outfilename): def write_tmcf(outfilename): + if isinstance(TEMPLATE_MCF, list): + template_content = "\n".join(str(item) for item in TEMPLATE_MCF) + else: + template_content = str(TEMPLATE_MCF) + with open(outfilename, 'w') as f_out: - f_out.write(TEMPLATE_MCF) + f_out.write(template_content) + + +def main(_): + global URL_SUFFIX, BASE_URL, TEMPLATE_MCF, FILENAMES + + try: + # Load configuration from config.json + with file_util.FileIO(_FLAGS.config_path, 'r') as f: + config = json.load(f) + + YEARS = config["YEARS"] + NORM_CSV_COLUMNS = config["NORM_CSV_COLUMNS"] + NORM_CSV_COLUMNS1 = config["NORM_CSV_COLUMNS1"] + CSV_COLUMNS_BY_YEAR = config["CSV_COLUMNS_BY_YEAR"] + ZIP_FILENAMES = config["ZIP_FILENAMES"] + FILENAMES = config["FILENAMES"] + TEMPLATE_MCF = config["TEMPLATE_MCF"] + BASE_URL = config["BASE_URL"] + URL_SUFFIX = config["URL_SUFFIX"] + RENAME_COLUMNS_YEARS = config["RENAME_COLUMNS_YEARS"] + + dfs = {} + input_folder = os.path.join(_MODULE_DIR, 'input') + + # Download files if the mode is 'download' or if no mode is specified + if _FLAGS.mode == "" or _FLAGS.mode == "download": + for year in YEARS: + try: + logging.info(f"Processing year: {year}") + columns = CSV_COLUMNS_BY_YEAR[year] + zip_filename = ZIP_FILENAMES.get(year, None) + + file_path = os.path.join( + input_folder, + f'{year}.zip' if zip_filename else f'{year}.csv') + + if not os.path.exists(file_path): + logging.info( + f"File for {year} not found. Downloading...") + url = build_url(year, zip_filename) + download_file(url, year, input_folder, zip_filename) + + except Exception as e: + logging.fatal(f"Error processing data for year {year}: {e}") + continue + + # Process files if the mode is 'process' or if no mode is specified + if _FLAGS.mode == "" or _FLAGS.mode == "process": + for year in YEARS: + try: + logging.info(f"Processing data for year {year}") + columns = CSV_COLUMNS_BY_YEAR[year] + zip_filename = ZIP_FILENAMES.get(year, None) + + file_path = os.path.join( + input_folder, + f'{year}.zip' if zip_filename else f'{year}.csv') + + # Process the downloaded file + if zip_filename: + with zipfile.ZipFile(file_path, 'r') as zfile: + with zfile.open(f'{FILENAMES[year]}.csv', + 'r') as newfile: + dfs[year] = pd.read_csv(newfile, + engine='python', + encoding='latin1', + usecols=columns) + else: + dfs[year] = pd.read_csv(file_path, + sep=',', + usecols=columns) + + logging.info(f"File processed for {year} successfully") + + if year in RENAME_COLUMNS_YEARS: + cols_renamed = dict(zip(columns, NORM_CSV_COLUMNS1)) + else: + cols_renamed = dict(zip(columns, NORM_CSV_COLUMNS)) + + dfs[year] = dfs[year].rename(columns=cols_renamed) + logging.info(f"Columns renamed for {year} successfully") + + except Exception as e: + logging.fatal(f"Error processing data for year {year}: {e}") + continue + + # Write the combined data and template + logging.info("Writing data to CSV") + write_csv(dfs, 'ejscreen_airpollutants.csv') + + logging.info("Writing template to TMCF") + write_tmcf('ejscreen.tmcf') + + logging.info("Process completed successfully") + + except Exception as e: + logging.fatal(f"Unexpected error in the main process: {e}") if __name__ == '__main__': - dfs = {} - for year in YEARS: - print(year) - columns = CSV_COLUMNS_BY_YEAR[year] - # request file - zip_filename = ZIP_FILENAMES[year] - if zip_filename is not None: - response = requests.get( - f'https://gaftp.epa.gov/EJSCREEN/{year}/{zip_filename}.zip') - with zipfile.ZipFile(io.BytesIO(response.content())) as zfile: - with zfile.open(f'{FILENAMES[year]}.csv', 'r') as newfile: - dfs[year] = pd.read_csv(newfile, usecols=columns) - # some years are not zipped - else: - response = requests.get( - f'https://gaftp.epa.gov/EJSCREEN/{year}/{FILENAMES[year]}.csv') - dfs[year] = pd.read_csv(response, usecols=columns) - # rename weird column names to match other years - if columns != NORM_CSV_COLUMNS: - cols_renamed = dict(zip(columns, NORM_CSV_COLUMNS)) - dfs[year] = dfs[year].rename(columns=cols_renamed) - - write_csv(dfs, 'ejscreen_airpollutants.csv') - write_tmcf('ejscreen.tmcf') + app.run(main) diff --git a/scripts/us_epa/ejscreen/ejscreen.tmcf b/scripts/us_epa/ejscreen/ejscreen.tmcf index 21ef79d3f3..785c1a0a8b 100644 --- a/scripts/us_epa/ejscreen/ejscreen.tmcf +++ b/scripts/us_epa/ejscreen/ejscreen.tmcf @@ -1,42 +1,5 @@ -Node: E:ejscreen_airpollutants->E0 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:Mean_Concentration_AirPollutant_DieselPM -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->DSLPM -unit: dcs:MicrogramsPerCubicMeter - -Node: E:ejscreen_airpollutants->E1 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:AirPollutant_Cancer_Risk -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->CANCER - -Node: E:ejscreen_airpollutants->E2 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:AirPollutant_Respiratory_Hazard -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->RESP - -Node: E:ejscreen_airpollutants->E3 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:Mean_Concentration_AirPollutant_Ozone -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->OZONE -unit: dcs:PartsPerBillion - -Node: E:ejscreen_airpollutants->E4 -typeOf: dcs:StatVarObservation -variableMeasured: dcs:Mean_Concentration_AirPollutant_PM2.5 -observationDate: C:ejscreen_airpollutants->year -observationAbout: C:ejscreen_airpollutants->FIPS -observationPeriod: dcs:P1Y -value: C:ejscreen_airpollutants->PM25 -unit: dcs:MicrogramsPerCubicMeter \ No newline at end of file +{'Node': 'E:ejscreen_airpollutants->E0', 'typeOf': 'dcs:StatVarObservation', 'variableMeasured': 'dcs:Mean_Concentration_AirPollutant_DieselPM', 'observationDate': 'C:ejscreen_airpollutants->year', 'observationAbout': 'C:ejscreen_airpollutants->FIPS', 'observationPeriod': 'dcs:P1Y', 'value': 'C:ejscreen_airpollutants->DSLPM', 'unit': 'dcs:MicrogramsPerCubicMeter'} +{'Node': 'E:ejscreen_airpollutants->E1', 'typeOf': 'dcs:StatVarObservation', 'variableMeasured': 'dcs:AirPollutant_Cancer_Risk', 'observationDate': 'C:ejscreen_airpollutants->year', 'observationAbout': 'C:ejscreen_airpollutants->FIPS', 'observationPeriod': 'dcs:P1Y', 'value': 'C:ejscreen_airpollutants->CANCER', 'unit': 'dcs:PerMillionPerson'} +{'Node': 'E:ejscreen_airpollutants->E2', 'typeOf': 'dcs:StatVarObservation', 'variableMeasured': 'dcs:AirPollutant_Respiratory_Hazard', 'observationDate': 'C:ejscreen_airpollutants->year', 'observationAbout': 'C:ejscreen_airpollutants->FIPS', 'observationPeriod': 'dcs:P1Y', 'value': 'C:ejscreen_airpollutants->RESP'} +{'Node': 'E:ejscreen_airpollutants->E3', 'typeOf': 'dcs:StatVarObservation', 'variableMeasured': 'dcs:Mean_Concentration_AirPollutant_Ozone', 'observationDate': 'C:ejscreen_airpollutants->year', 'observationAbout': 'C:ejscreen_airpollutants->FIPS', 'observationPeriod': 'dcs:P1Y', 'value': 'C:ejscreen_airpollutants->OZONE', 'unit': 'dcs:PartsPerBillion'} +{'Node': 'E:ejscreen_airpollutants->E4', 'typeOf': 'dcs:StatVarObservation', 'variableMeasured': 'dcs:Mean_Concentration_AirPollutant_PM2.5', 'observationDate': 'C:ejscreen_airpollutants->year', 'observationAbout': 'C:ejscreen_airpollutants->FIPS', 'observationPeriod': 'dcs:P1Y', 'value': 'C:ejscreen_airpollutants->PM25', 'unit': 'dcs:MicrogramsPerCubicMeter'} \ No newline at end of file diff --git a/scripts/us_epa/ejscreen/ejscreen_test.py b/scripts/us_epa/ejscreen/ejscreen_test.py index 7965671493..12c98be7a9 100644 --- a/scripts/us_epa/ejscreen/ejscreen_test.py +++ b/scripts/us_epa/ejscreen/ejscreen_test.py @@ -1,8 +1,20 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. ''' Unit tests for ejscreen.py Usage: python3 -m unittest discover -v -s ../ -p "ejscreen_test.py" ''' - import unittest import os import tempfile @@ -16,19 +28,30 @@ class TestEjscreen(unittest.TestCase): def test_write_csv(self): with tempfile.TemporaryDirectory() as tmp_dir: + # Ensure test data file exists in the expected directory + test_data_file = os.path.join(module_dir_, + 'test_data/test_data.csv') + expected_data_file = os.path.join( + module_dir_, 'test_data/test_data_expected.csv') + + if not os.path.exists(test_data_file) or not os.path.exists( + expected_data_file): + raise FileNotFoundError( + f"Test data files are missing: {test_data_file}, {expected_data_file}" + ) + dfs = {} - dfs['2020'] = pd.read_csv(os.path.join(module_dir_, - 'test_data/test_data.csv'), - float_precision='high') + dfs['2020'] = pd.read_csv(test_data_file, float_precision='high') test_csv = os.path.join(tmp_dir, 'test_csv.csv') write_csv(dfs, test_csv) - expected_csv = os.path.join(module_dir_, - 'test_data/test_data_expected.csv') + with open(test_csv, 'r') as test: - test_str: str = test.read() - with open(expected_csv, 'r') as expected: - expected_str: str = expected.read() + test_str = test.read() + with open(expected_data_file, 'r') as expected: + expected_str = expected.read() self.assertEqual(test_str, expected_str) + + # Remove temporary test file after assertion os.remove(test_csv) diff --git a/scripts/us_epa/ejscreen/manifest.json b/scripts/us_epa/ejscreen/manifest.json new file mode 100644 index 0000000000..d2bd898d21 --- /dev/null +++ b/scripts/us_epa/ejscreen/manifest.json @@ -0,0 +1,21 @@ +{ + "import_specifications": [ + { + "import_name": "EPA_EJSCREEN", + "curator_emails": [], + "provenance_url": "https://gaftp.epa.gov/EJSCREEN/", + "provenance_description": "The Census Bureau's Ejscreen data", + "scripts": [ + "ejscreen.py" + ], + "import_inputs": [ + { + "template_mcf": "ejscreen.tmcf", + "cleaned_csv": "ejscreen_airpollutants.csv" + } + ], + "cron_schedule": "0 7 * * 1" + } + ] +} +