diff --git a/scripts/us_census/acs5yr/subject_tables/s2408/README.md b/scripts/us_census/acs5yr/subject_tables/s2408/README.md index bb68503885..5e0d8beee4 100644 --- a/scripts/us_census/acs5yr/subject_tables/s2408/README.md +++ b/scripts/us_census/acs5yr/subject_tables/s2408/README.md @@ -2,9 +2,9 @@ This subject table provides data on the class of workers by sex. -Years: 2010-2019 +Years: 2010-2023 Geo : Country, State, County and Place Notes: 1. The data on 'Median Earnings' for 2010-2014 is available in table S2408. From 2015 onwards, this data is available in the table S2418. -2. Only percentages are provided for Male and Female counts from 2010-2014. This will be converted to counts through the code. \ No newline at end of file +2. Only percentages are provided for Male and Female counts from 2010-2014. This will be converted to counts through the code. diff --git a/scripts/us_census/acs5yr/subject_tables/s2408/process.py b/scripts/us_census/acs5yr/subject_tables/s2408/process.py index b6e8ad5cd4..cf1c09a39e 100644 --- a/scripts/us_census/acs5yr/subject_tables/s2408/process.py +++ b/scripts/us_census/acs5yr/subject_tables/s2408/process.py @@ -135,7 +135,13 @@ def _process_dataframe(self, df, filename): obs_df = pd.DataFrame(columns=self.csv_columns) obs_df['Place'] = place_geoIds obs_df['StatVar'] = column_map[column]['Node'] - obs_df['Quantity'] = df[column].values.tolist() + # obs_df['Quantity'] = df[column].values.tolist() + + # Clean the quantity values by removing commas, dashes, and any non-numeric characters like '+' + obs_df['Quantity'] = df[column].apply(lambda x: str(x).replace( + ',', '').replace('-', '').replace('+', '')).astype( + float).tolist() + # add unit to the csv try: unit = column_map[column]['unit']