updated process.py file

datacommonsorg · Jan 20, 2025 · 8abf7c0 · 8abf7c0
1 parent baf4ec9
commit 8abf7c0
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 3 deletions.
diff --git a/scripts/us_census/acs5yr/subject_tables/s2408/README.md b/scripts/us_census/acs5yr/subject_tables/s2408/README.md
@@ -2,9 +2,9 @@
 
 This subject table provides data on the class of workers by sex.
 
-Years: 2010-2019  
+Years: 2010-2023 
 Geo : Country, State, County and Place
 
 Notes:
 1. The data on 'Median Earnings' for 2010-2014 is available in table S2408. From 2015 onwards, this data is available in the table S2418. 
-2. Only percentages are provided for Male and Female counts from 2010-2014. This will be converted to counts through the code.
+2. Only percentages are provided for Male and Female counts from 2010-2014. This will be converted to counts through the code.
diff --git a/scripts/us_census/acs5yr/subject_tables/s2408/process.py b/scripts/us_census/acs5yr/subject_tables/s2408/process.py
@@ -135,7 +135,13 @@ def _process_dataframe(self, df, filename):
                 obs_df = pd.DataFrame(columns=self.csv_columns)
                 obs_df['Place'] = place_geoIds
                 obs_df['StatVar'] = column_map[column]['Node']
-                obs_df['Quantity'] = df[column].values.tolist()
+                # obs_df['Quantity'] = df[column].values.tolist()
+
+                # Clean the quantity values by removing commas, dashes, and any non-numeric characters like '+'
+                obs_df['Quantity'] = df[column].apply(lambda x: str(x).replace(
+                    ',', '').replace('-', '').replace('+', '')).astype(
+                        float).tolist()
+
                 # add unit to the csv
                 try:
                     unit = column_map[column]['unit']