From c3743d26220485f8b00e73153dc1ec6f69adf747 Mon Sep 17 00:00:00 2001 From: Niklewa Date: Tue, 2 Jul 2024 17:34:26 +0200 Subject: [PATCH] some changes regarding the review --- cities/utils/clean_variable.py | 2 +- cities/utils/data_grabber.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cities/utils/clean_variable.py b/cities/utils/clean_variable.py index 0ac3fd7b..974926e8 100644 --- a/cities/utils/clean_variable.py +++ b/cities/utils/clean_variable.py @@ -11,7 +11,7 @@ def __init__( self, variable_name: str, path_to_raw_csv: str, - year_or_category: str = "Year", # Year or Category + year_or_category: str = "Year", # Column name to store years or categories in the long format ): self.variable_name = variable_name self.path_to_raw_csv = path_to_raw_csv diff --git a/cities/utils/data_grabber.py b/cities/utils/data_grabber.py index 6ab5ebb2..8c4182aa 100644 --- a/cities/utils/data_grabber.py +++ b/cities/utils/data_grabber.py @@ -17,7 +17,7 @@ def find_repo_root() -> Path: def check_if_tensed(df): years_to_check = ["2015", "2018", "2019", "2020"] - check = any(year in df.columns for year in years_to_check) + check = df.columns[2:].isin(years_to_check).any().any() return check @@ -70,28 +70,28 @@ def __init__(self): class CTDataGrabberCSV(DataGrabberCSV): def __init__( - self, level_DG: str = "pre_2020" + self, ct_time_period: str = "pre_2020" ): # new argument pre_2020 and post_2020 super().__init__() self.data_path = os.path.join(self.repo_root, "data/Census_tract_level") - self.level_DG = level_DG + self.ct_time_period = ct_time_period sys.path.insert(0, self.data_path) def _get_features( self, features: List[str], table_suffix: str - ) -> None: # redefining data grabbing to depend on `level_DG` argument + ) -> None: # redefining data grabbing to depend on `ct_time_period` argument for feature in features: - if self.level_DG == "pre_2020": + if self.ct_time_period == "pre_2020": file_path = os.path.join( self.data_path, f"{feature}_pre2020_CT_{table_suffix}.csv" ) - elif self.level_DG == "post_2020": + elif self.ct_time_period == "post_2020": file_path = os.path.join( self.data_path, f"{feature}_post2020_CT_{table_suffix}.csv" ) else: raise ValueError( - "Invalid level_DG. Please choose 'pre_2020' or 'post_2020'." + "Invalid ct_time_period. Please choose 'pre_2020' or 'post_2020'." ) if os.path.exists(file_path): @@ -124,7 +124,7 @@ def get_features_std_long(self, features: List[str]) -> None: self._get_features(features, "std_long") -def list_available_features(level="county", level_DG="pre_2020"): +def list_available_features(level="county", ct_time_period="pre_2020"): root = find_repo_root() if level == "county": @@ -143,9 +143,9 @@ def list_available_features(level="county", level_DG="pre_2020"): for file_name in file_names: if level == "census_tract": - if level_DG == "pre_2020" and "pre2020" in file_name: + if ct_time_period == "pre_2020" and "pre2020" in file_name: matches = re.split(r"_wide|_long|_std|_pre2020", file_name) - elif level_DG == "post_2020" and "pre2020" not in file_name: + elif ct_time_period == "post_2020" and "pre2020" not in file_name: matches = re.split(r"_wide|_long|_std|_post2020", file_name) else: continue @@ -165,7 +165,7 @@ def list_available_features(level="county", level_DG="pre_2020"): return sorted(feature_names) -def list_tensed_features(level="county", level_DG="pre_2020"): +def list_tensed_features(level="county", ct_time_period="pre_2020"): if level == "county": data = DataGrabber() all_features = list_available_features(level="county") @@ -176,9 +176,9 @@ def list_tensed_features(level="county", level_DG="pre_2020"): elif level == "census_tract": data = CTDataGrabberCSV( - level_DG=level_DG + ct_time_period=ct_time_period ) # TODO: Change to CTDataGrabber() in the future - all_features = list_available_features(level="census_tract", level_DG=level_DG) + all_features = list_available_features(level="census_tract", ct_time_period=ct_time_period) else: raise ValueError(