Skip to content

Commit

Permalink
CDC_imports_AutoRefresh
Browse files Browse the repository at this point in the history
  • Loading branch information
SudhishaK committed Jan 21, 2025
1 parent 1dfd3f9 commit 35a3b8a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,3 @@ variableMeasured: C:PM25CTPollution->StatisticalVariable
observationPeriod: "P24H"
unit: dcs:MicrogramsPerCubicMeter
value: C:PM25CTPollution->Value

Node: E:PM25CTPollution->E2
observationAbout: C:PM25CTPollution->dcid
typeOf: dcs:StatVarObservation
observationDate: C:PM25CTPollution->date
variableMeasured: C:PM25CTPollution->StatisticalVariable
observationPeriod: "P24H"
value: C:PM25CTPollution->Value
52 changes: 23 additions & 29 deletions scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ def clean_air_quality_data(config, importname, inputpath, outputpath):
output_file_name = file_info["output_file_name"]
input_file_name = file_info["input_file_name"]
input_file_path = os.path.join(inputpath, input_file_name)
output_file_path = os.path.join(outputpath,
output_file_name)
output_file_path = os.path.join(outputpath, output_file_name)
logging.info(f"input_file_name: {input_file_name}")
logging.info(f"output_file_name: {output_file_name}")
if str(input_file_name).endswith('.csv'):
Expand All @@ -105,29 +104,28 @@ def clean_air_quality_data(config, importname, inputpath, outputpath):
try:
data = pd.read_csv(input_file_path)
data["date"] = pd.to_datetime(data["date"],
yearfirst=True)
yearfirst=True)
data["date"] = pd.to_datetime(data["date"],
format="%Y-%m-%d")
format="%Y-%m-%d")

if "PM2.5" in input_file_name:
census_tract = "ds_pm"
elif "Ozone" in input_file_name:
census_tract = "ds_o3"
if "Census" in input_file_name:
if "PM2.5" in input_file_name:
data = pd.melt(
data,
id_vars=[
'year', 'date', 'statefips',
'countyfips', 'ctfips', 'latitude',
'longitude'
],
value_vars=[
str(census_tract + '_pred'),
str(census_tract + '_stdd')
],
var_name='StatisticalVariable',
value_name='Value')
data = pd.melt(data,
id_vars=[
'year', 'date', 'statefips',
'countyfips', 'ctfips',
'latitude', 'longitude'
],
value_vars=[
str(census_tract + '_pred'),
str(census_tract + '_stdd')
],
var_name='StatisticalVariable',
value_name='Value')
elif "Ozone" in input_file_name:
data = pd.melt(
data,
Expand All @@ -136,37 +134,33 @@ def clean_air_quality_data(config, importname, inputpath, outputpath):
'countyfips', 'ctfips', 'latitude',
'longitude', census_tract + '_stdd'
],
value_vars=[
str(census_tract + '_pred')
],
value_vars=[str(census_tract + '_pred')],
var_name='StatisticalVariable',
value_name='Value')
data.rename(
columns={census_tract + '_stdd': 'Error'},
inplace=True)
max_length = data['ctfips'].astype(
str).str.len().max()
data['ctfips'] = data['ctfips'].astype(
str).apply(lambda x: add_prefix_zero(
x, max_length))
data["dcid"] = "geoId/" + data["ctfips"].astype(
str)
data['ctfips'] = data['ctfips'].astype(str).apply(
lambda x: add_prefix_zero(x, max_length))
data["dcid"] = "geoId/" + data["ctfips"].astype(str)
data['StatisticalVariable'] = data[
'StatisticalVariable'].map(STATVARS)
elif "County" in input_file_name and "PM" in input_file_name:
data["statefips"] = data["statefips"].astype(
str).str.zfill(2)
data["countyfips"] = data["countyfips"].astype(
str).str.zfill(3)
data["dcid"] = "geoId/" + data[
"statefips"] + data["countyfips"]
data["dcid"] = "geoId/" + data["statefips"] + data[
"countyfips"]
elif "County" in input_file_name and "Ozone" in input_file_name:
data["statefips"] = data["statefips"].astype(
str).str.zfill(2)
data["countyfips"] = data["countyfips"].astype(
str).str.zfill(3)
data["dcid"] = "geoId/" + data[
"statefips"] + data["countyfips"]
data["dcid"] = "geoId/" + data["statefips"] + data[
"countyfips"]
data.to_csv(output_file_path,
float_format='%.6f',
index=False)
Expand Down

0 comments on commit 35a3b8a

Please sign in to comment.