diff --git a/backend/helper/index.html b/backend/helper/index.html index 9c22623..9322c43 100755 --- a/backend/helper/index.html +++ b/backend/helper/index.html @@ -743,21 +743,21 @@

Source code in src\backend\helper.py -
24
+              
class ASCIIColors(StrEnum):
-    """ASCII colors for use in printing colored text to the terminal."""
-
-    GREY = "\x1b[38;20m"
-    YELLOW = "\x1b[33;20m"
-    RED = "\x1b[31;20m"
-    BOLD_RED = "\x1b[31;1m"
-    RESET = "\x1b[0m"
+30
class ASCIIColors(StrEnum):
+    """ASCII colors for use in printing colored text to the terminal."""
+
+    GREY = "\x1b[38;20m"
+    YELLOW = "\x1b[33;20m"
+    RED = "\x1b[31;20m"
+    BOLD_RED = "\x1b[31;1m"
+    RESET = "\x1b[0m"
 
@@ -830,23 +830,23 @@

Source code in src\backend\helper.py -
148
+            
def df_to_file(df: pl.DataFrame):
-    """Write a DataFrame to a unique file.
-
-    Args:
-        df (pl.DataFrame): the DataFrame to write
-    """
-    file_path = OUTPUT_DIR / f"{time.time()}_data_frame.csv"
-    print(f"Dataframe saved to {file_path.resolve()}")
-    df.write_csv(file_path, include_header=True)
+155
def df_to_file(df: pl.DataFrame):
+    """Write a DataFrame to a unique file.
+
+    Args:
+        df (pl.DataFrame): the DataFrame to write
+    """
+    file_path = OUTPUT_DIR / f"{time.time()}_data_frame.csv"
+    print(f"Dataframe saved to {file_path.resolve()}")
+    df.write_csv(file_path, include_header=True)
 
@@ -925,7 +925,8 @@

Source code in src\backend\helper.py -
68
+            
67
+68
 69
 70
 71
@@ -935,19 +936,18 @@ 

75 76 77 -78 -79

def is_valid_zipcode(zip: int) -> bool:
-    """Check if the given ZIP code is valid based on a local file.
-
-    Args:
-        zip (int): the ZIP code to check
-
-    Returns:
-        bool: if ZIP code is valid
-    """
-    if isinstance(zip, str):
-        zip = int(zip)
-    return zip in master_df["ZIP"]
+78
def is_valid_zipcode(zip: int) -> bool:
+    """Check if the given ZIP code is valid based on a local file.
+
+    Args:
+        zip (int): the ZIP code to check
+
+    Returns:
+        bool: if ZIP code is valid
+    """
+    if isinstance(zip, str):
+        zip = int(zip)
+    return zip in MASTER_DF["ZIP"]
 
@@ -1026,7 +1026,8 @@

Source code in src\backend\helper.py -
 90
+            
 89
+ 90
  91
  92
  93
@@ -1048,31 +1049,30 @@ 

109 110 111 -112 -113

def metro_name_to_zip_code_list(msa_name: str) -> list[int]:
-    """Return the constituent ZIP codes for the given Metropolitan Statistical Area.
-
-    Args:
-        msa_name (str): name of the Metropolitan Statistical Area
-
-    Returns:
-        list[int]: list of ZIP codes found. Is empty if MSA name is invalid
-    """
-    if msa_name == "TEST":
-        # return [20814]  # good and small
-        # return [22067, 55424]  # nulls in sqft
-        return [20015, 20018, 20017]  # nulls in sqft and large
-
-    df = master_df.select("ZIP", "METRO_NAME", "LSAD")
-
-    return (
-        df.filter(
-            (pl.col("METRO_NAME").eq(msa_name))
-            & (pl.col("LSAD").eq("Metropolitan Statistical Area"))
-        )
-        .unique()["ZIP"]
-        .to_list()
-    )
+112
def metro_name_to_zip_code_list(msa_name: str) -> list[int]:
+    """Return the constituent ZIP codes for the given Metropolitan Statistical Area.
+
+    Args:
+        msa_name (str): name of the Metropolitan Statistical Area
+
+    Returns:
+        list[int]: list of ZIP codes found. Is empty if MSA name is invalid
+    """
+    if msa_name == "TEST":
+        # return [20814]  # good and small
+        # return [22067, 55424]  # nulls in sqft
+        return [20015, 20018, 20017]  # nulls in sqft and large
+
+    df = MASTER_DF.select("ZIP", "METRO_NAME", "LSAD")
+
+    return (
+        df.filter(
+            (pl.col("METRO_NAME").eq(msa_name))
+            & (pl.col("LSAD").eq("Metropolitan Statistical Area"))
+        )
+        .unique()["ZIP"]
+        .to_list()
+    )
 
@@ -1151,7 +1151,8 @@

Source code in src\backend\helper.py -
134
+            
133
+134
 135
 136
 137
@@ -1161,19 +1162,18 @@ 

141 142 143 -144 -145

def req_get_to_file(request: requests.Response) -> int:
-    """Write the contents of a request response to a unique file.
-
-    Args:
-        request (requests.Response): the request
-
-    Returns:
-        int: the status code of the request
-    """
-    with open(OUTPUT_DIR / f"{time.time()}_request.html", "w+", encoding="utf-8") as f:
-        f.write(request.text)
-    return request.status_code
+144
def req_get_to_file(request: requests.Response) -> int:
+    """Write the contents of a request response to a unique file.
+
+    Args:
+        request (requests.Response): the request
+
+    Returns:
+        int: the status code of the request
+    """
+    with open(OUTPUT_DIR / f"{time.time()}_request.html", "w+", encoding="utf-8") as f:
+        f.write(request.text)
+    return request.status_code
 
@@ -1266,7 +1266,8 @@

Source code in src\backend\helper.py -
51
+            
50
+51
 52
 53
 54
@@ -1279,22 +1280,21 @@ 

61 62 63 -64 -65

def state_city_to_zip_df(state: str, city: str) -> pl.DataFrame:
-    """Take in a state and city and return the ZIP code constituents of that city.
-
-    Args:
-        state (str): the state
-        city (str): the city
-
-    Returns:
-        pl.DataFrame: DataFrame of ZIP codes
-    """
-    return (
-        pl.read_csv("zip_registry.csv")
-        .filter((pl.col("state") == state) & (pl.col("city") == city))
-        .select("zipcode")
-    )
+64
def state_city_to_zip_df(state: str, city: str) -> pl.DataFrame:
+    """Take in a state and city and return the ZIP code constituents of that city.
+
+    Args:
+        state (str): the state
+        city (str): the city
+
+    Returns:
+        pl.DataFrame: DataFrame of ZIP codes
+    """
+    return (
+        pl.read_csv("zip_registry.csv")
+        .filter((pl.col("state") == state) & (pl.col("city") == city))
+        .select("zipcode")
+    )
 
@@ -1387,7 +1387,8 @@

Source code in src\backend\helper.py -
34
+            
33
+34
 35
 36
 37
@@ -1400,22 +1401,21 @@ 

44 45 46 -47 -48

def state_county_to_zip_df(state: str, county: str) -> pl.DataFrame:
-    """Take in a state and county and return the ZIP code constituents of that county.
-
-    Args:
-        state (str): the state
-        county (str): the county
-
-    Returns:
-        pl.DataFrame: DataFrame of ZIP codes
-    """
-    return (
-        pl.read_csv("zip_registry.csv")
-        .filter((pl.col("state") == state) & (pl.col("county") == county))
-        .select("zipcode")
-    )
+47
def state_county_to_zip_df(state: str, county: str) -> pl.DataFrame:
+    """Take in a state and county and return the ZIP code constituents of that county.
+
+    Args:
+        state (str): the state
+        county (str): the county
+
+    Returns:
+        pl.DataFrame: DataFrame of ZIP codes
+    """
+    return (
+        pl.read_csv("zip_registry.csv")
+        .filter((pl.col("state") == state) & (pl.col("county") == county))
+        .select("zipcode")
+    )
 
@@ -1494,7 +1494,8 @@

Source code in src\backend\helper.py -
116
+            
115
+116
 117
 118
 119
@@ -1508,23 +1509,22 @@ 

127 128 129 -130 -131

def zip_to_metro(zip: int) -> str:
-    """Find the Metropolitan Statistical Area name for the specified ZIP code.
-
-    Args:
-        zip (int): the ZIP code to look up
-
-    Returns:
-        str: the Metropolitan name. Is empty if the ZIP code is not a part of a Metropolitan Statistical Area
-    """
-    result = master_df.filter(master_df["ZIP"] == zip)["METRO_NAME"]
-
-    if len(result) > 0:
-        log("Zip has multiple codes. Only giving first one", "debug")
-        return result[0]
-    else:
-        return ""  # should this be none?
+130
def zip_to_metro(zip: int) -> str:
+    """Find the Metropolitan Statistical Area name for the specified ZIP code.
+
+    Args:
+        zip (int): the ZIP code to look up
+
+    Returns:
+        str: the Metropolitan name. Is empty if the ZIP code is not a part of a Metropolitan Statistical Area
+    """
+    result = MASTER_DF.filter(MASTER_DF["ZIP"] == zip)["METRO_NAME"]
+
+    if len(result) > 0:
+        log("Zip has multiple codes. Only giving first one", "debug")
+        return result[0]
+    else:
+        return ""  # should this be none?
 
diff --git a/backend/redfinscraper/index.html b/backend/redfinscraper/index.html index 02bf658..5c6b477 100755 --- a/backend/redfinscraper/index.html +++ b/backend/redfinscraper/index.html @@ -775,7 +775,16 @@

Source code in src\backend\redfinscraper.py -
 94
+              
 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
  95
  96
  97
@@ -1469,701 +1478,798 @@ 

785 786 787 -788

class RedfinApi:
-    """Scrape redfin using their stingray api. Use this class for getting and the iterating over ZIP code level data, creating an object for each new zip code."""
-
-    class SoldStatus(StrEnum):
-        FOR_SALE = "For Sale"
-        SOLD = "Sold"
-
-    class HouseType(StrEnum):
-        HOUSE = "1"
-        CONDO = "2"
-        TOWNHOUSE = "3"
-        MULTI_FAMILY = "4"
-        LAND = "5"
-        OTHER = "6"
-
-    class Price(StrEnum):
-        NONE = "None"
-        FIFTY_THOU = "50000"
-        SEVENTY_FIVE_THOU = "75000"
-        ONE_HUN_THOU = "100000"
-        ONE_HUN_25_THOU = "125000"
-        ONE_HUN_5_THOU = "150000"
-        ONE_HUN_75_THOU = "175000"
-        TWO_HUN_THOU = "200000"
-        TWO_HUN_25_THOU = "225000"
-        TWO_HUN_5_THOU = "250000"
-        TWO_HUN_75_THOU = "275000"
-        THREE_HUN_THOU = "300000"
-        THREE_HUN_25_THOU = "325000"
-        THREE_HUN_5_THOU = "350000"
-        THREE_HUN_75_THOU = "375000"
-        FOUR_HUN_THOU = "400000"
-        FOUR_HUN_25_THOU = "425000"
-        FOUR_HUN_5_THOU = "450000"
-        FOUR_HUN_75_THOU = "475000"
-        FIVE_HUN_THOU = "500000"
-        FIVE_HUN_5_THOU = "550000"
-        SIX_HUN_THOU = "600000"
-        SIX_HUN_5_THOU = "650000"
-        SEVEN_HUN_THOU = "700000"
-        SEVEN_HUN_5_THOU = "750000"
-        EIGHT_HUN_THOU = "800000"
-        EIGHT_HUN_5_THOU = "850000"
-        NINE_HUN_THOU = "900000"
-        NINE_HUN_5_THOU = "950000"
-        ONE_MIL = "1000000"
-        ONE_MIL_25_THOU = "1250000"
-        ONE_MIL_5_THOU = "1500000"
-        ONE_MIL_75_THOU = "1750000"
-        TWO_MIL = "2000000"
-        TWO_MIL_25_THOU = "2250000"
-        TWO_MIL_5_THOU = "2500000"
-        TWO_MIL_75_THOU = "2750000"
-        THREE_MIL = "3000000"
-        THREE_MIL_25_THOU = "3250000"
-        THREE_MIL_5_THOU = "3500000"
-        THREE_MIL_75_THOU = "3750000"
-        FOUR_MIL = "4000000"
-        FOUR_MIL_25_THOU = "4250000"
-        FOUR_MIL_5_THOU = "4500000"
-        FOUR_MIL_75_THOU = "4750000"
-        FIVE_MIL = "5000000"
-        SIX_MIL = "6000000"
-        SEVEN_MIL = "7000000"
-        EIGHT_MIL = "8000000"
-        NINE_MIL = "9000000"
-        TEN_MIL = "10000000"
-
-    class SortOrder(StrEnum):
-        RECOMMENDED = "redfin-recommended-asc"
-        NEWEST = "days-on-redfin-asc"
-        MOST_RECENTLY_SOLD = "last-sale-date-desc"
-        LOW_HI = "price-asc"
-        HI_LOW = "price-desc"
-        SQFT = "square-footage-desc"
-        LOT_SIZE = "lot-sq-ft-desc"
-        SQFT_PRICE = "dollars-per-sq-ft-asc"
-
-    class SoldWithinDays(StrEnum):
-        ONE_WEEK = "7"
-        ONE_MONTH = "30"
-        THREE_MONTHS = "90"
-        SIX_MONTHS = "180"
-        ONE_YEAR = "365"
-        TWO_YEARS = "730"
-        THREE_YEARS = "1095"
-        FIVE_YEARS = "1825"
-
-    class Stories(StrEnum):
-        ONE = "1"
-        TWO = "2"
-        THREE = "3"
-        FOUR = "4"
-        FIVE = "5"
-        TEN = "10"
-        FIFTEEN = "15"
-        TWENTY = "20"
-
-    class Sqft(StrEnum):
-        NONE = "None"
-        SEVEN_FIFTY = "750"
-        THOU = "1000"
-        THOU_1 = "1100"
-        THOU_2 = "1200"
-        THOU_3 = "1300"
-        THOU_4 = "1400"
-        THOU_5 = "1500"
-        THOU_6 = "1600"
-        THOU_7 = "1700"
-        THOU_8 = "1800"
-        THOU_9 = "1900"
-        TWO_THOU = "2000"
-        TWO_THOU_250 = "2250"
-        TWO_THOU_500 = "2500"
-        TWO_THOU_750 = "2750"
-        THREE_THOU = "3000"
-        FOUR_THOU = "4000"
-        FIVE_THOU = "5000"
-        SEVEN_THOU_500 = "7500"
-        TEN_THOU = "10000"
-
-    def __init__(self) -> None:
-        self.rf = redfin.Redfin()
-        self.DESIRED_CSV_SCHEMA = {
-            "ADDRESS": str,
-            "CITY": str,
-            "PROPERTY TYPE": str,
-            "STATE OR PROVINCE": str,
-            "YEAR BUILT": pl.UInt16,
-            "ZIP OR POSTAL CODE": pl.UInt32,
-            "PRICE": pl.UInt32,
-            "SQUARE FEET": pl.UInt32,
-            "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)": str,
-            "LATITUDE": pl.Float32,
-            "LONGITUDE": pl.Float32,
-        }
-        self.search_params = None
-        self.column_dict = {key: False for key in CATEGORY_PATTERNS.keys()}
-
-    def set_search_params(self, zip: str, search_filters: dict[str, Any]) -> None:
-        """Set the parameters for searching by ZIP code.
-
-        Args:
-            zip (str): the ZIP code
-            search_filters (dict[str, Any]): search filters for appending to a gis-csv path
-        """
-        try:
-            region_info = self.get_region_info_from_zipcode(zip)
-        except json.JSONDecodeError:
-            log(f"Could not decode region info for {zip}.", "warn")
-            return None
-        except HTTPError:
-            log(f"Could not retrieve region info for {zip}.", "warn")
-            return None
-
-        if search_filters.get("for sale sold") == "Sold":
-            sort_order = self.SortOrder.MOST_RECENTLY_SOLD.value
-        else:
-            sort_order = self.SortOrder.NEWEST.value
-        # TODO make sure to fix filtering so that its not just "single family homes"
-
-        try:
-            market = region_info["payload"]["rootDefaults"]["market"]
-            region_id = region_info["payload"]["rootDefaults"]["region_id"]
-            status = str(region_info["payload"]["rootDefaults"]["status"])
-        except KeyError:
-            log("Market, region, or status could not be identified ", "warn")
-            return None
-
-        self.search_params = {
-            "al": 1,
-            "has_deal": "false",
-            "has_dishwasher": "false",
-            "has_laundry_facility": "false",
-            "has_laundry_hookups": "false",
-            "has_parking": "false",
-            "has_pool": "false",
-            "has_short_term_lease": "false",
-            "include_pending_homes": "false",  # probably an "include" option
-            "isRentals": "false",
-            "is_furnished": "false",
-            "is_income_restricted": "false",
-            "is_senior_living": "false",
-            "max_year_built": search_filters.get("max year built"),
-            "min_year_built": search_filters.get("min year built"),
-            "market": market,
-            "min_stories": search_filters.get("min stories"),
-            "num_homes": 350,
-            "ord": sort_order,
-            "page_number": "1",
-            "pool": "false",
-            "region_id": region_id,
-            "region_type": "2",
-            "status": status,
-            "travel_with_traffic": "false",
-            "travel_within_region": "false",
-            "utilities_included": "false",
-            "v": "8",
-        }
-        if search_filters.get("for sale sold") == "Sold":
-            self.search_params["sold_within_days"] = search_filters.get("sold within")
-            self.search_params["status"] = 9
-        else:
-            self.search_params["sf"] = "1, 2, 3, 4, 5, 6, 7"
-            match [
-                search_filters.get("status coming soon"),
-                search_filters.get("status active"),
-                search_filters.get("status pending"),
-            ]:
-                case [True, False, False]:
-                    status = "8"
-                case [False, True, False]:
-                    status = "1"
-                case [False, False, True]:
-                    status = "130"
-                case [True, True, False]:
-                    status = "9"
-                case [False, True, True]:
-                    status = "139"
-                case [True, False, True]:
-                    status = "138"
-                case [True, True, True]:
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
class RedfinApi:
+    """Scrape redfin using their stingray api. Use this class for getting and the iterating over ZIP code level data, creating an object for each new zip code."""
+
+    class SoldStatus(StrEnum):
+        FOR_SALE = "For Sale"
+        SOLD = "Sold"
+
+    class HouseType(StrEnum):
+        HOUSE = "1"
+        CONDO = "2"
+        TOWNHOUSE = "3"
+        MULTI_FAMILY = "4"
+        LAND = "5"
+        OTHER = "6"
+
+    class Price(StrEnum):
+        NONE = "None"
+        FIFTY_THOU = "50000"
+        SEVENTY_FIVE_THOU = "75000"
+        ONE_HUN_THOU = "100000"
+        ONE_HUN_25_THOU = "125000"
+        ONE_HUN_5_THOU = "150000"
+        ONE_HUN_75_THOU = "175000"
+        TWO_HUN_THOU = "200000"
+        TWO_HUN_25_THOU = "225000"
+        TWO_HUN_5_THOU = "250000"
+        TWO_HUN_75_THOU = "275000"
+        THREE_HUN_THOU = "300000"
+        THREE_HUN_25_THOU = "325000"
+        THREE_HUN_5_THOU = "350000"
+        THREE_HUN_75_THOU = "375000"
+        FOUR_HUN_THOU = "400000"
+        FOUR_HUN_25_THOU = "425000"
+        FOUR_HUN_5_THOU = "450000"
+        FOUR_HUN_75_THOU = "475000"
+        FIVE_HUN_THOU = "500000"
+        FIVE_HUN_5_THOU = "550000"
+        SIX_HUN_THOU = "600000"
+        SIX_HUN_5_THOU = "650000"
+        SEVEN_HUN_THOU = "700000"
+        SEVEN_HUN_5_THOU = "750000"
+        EIGHT_HUN_THOU = "800000"
+        EIGHT_HUN_5_THOU = "850000"
+        NINE_HUN_THOU = "900000"
+        NINE_HUN_5_THOU = "950000"
+        ONE_MIL = "1000000"
+        ONE_MIL_25_THOU = "1250000"
+        ONE_MIL_5_THOU = "1500000"
+        ONE_MIL_75_THOU = "1750000"
+        TWO_MIL = "2000000"
+        TWO_MIL_25_THOU = "2250000"
+        TWO_MIL_5_THOU = "2500000"
+        TWO_MIL_75_THOU = "2750000"
+        THREE_MIL = "3000000"
+        THREE_MIL_25_THOU = "3250000"
+        THREE_MIL_5_THOU = "3500000"
+        THREE_MIL_75_THOU = "3750000"
+        FOUR_MIL = "4000000"
+        FOUR_MIL_25_THOU = "4250000"
+        FOUR_MIL_5_THOU = "4500000"
+        FOUR_MIL_75_THOU = "4750000"
+        FIVE_MIL = "5000000"
+        SIX_MIL = "6000000"
+        SEVEN_MIL = "7000000"
+        EIGHT_MIL = "8000000"
+        NINE_MIL = "9000000"
+        TEN_MIL = "10000000"
+
+    class SortOrder(StrEnum):
+        RECOMMENDED = "redfin-recommended-asc"
+        NEWEST = "days-on-redfin-asc"
+        MOST_RECENTLY_SOLD = "last-sale-date-desc"
+        LOW_HI = "price-asc"
+        HI_LOW = "price-desc"
+        SQFT = "square-footage-desc"
+        LOT_SIZE = "lot-sq-ft-desc"
+        SQFT_PRICE = "dollars-per-sq-ft-asc"
+
+    class SoldWithinDays(StrEnum):
+        ONE_WEEK = "7"
+        ONE_MONTH = "30"
+        THREE_MONTHS = "90"
+        SIX_MONTHS = "180"
+        ONE_YEAR = "365"
+        TWO_YEARS = "730"
+        THREE_YEARS = "1095"
+        FIVE_YEARS = "1825"
+
+    class Stories(StrEnum):
+        ONE = "1"
+        TWO = "2"
+        THREE = "3"
+        FOUR = "4"
+        FIVE = "5"
+        TEN = "10"
+        FIFTEEN = "15"
+        TWENTY = "20"
+
+    class Sqft(StrEnum):
+        NONE = "None"
+        SEVEN_FIFTY = "750"
+        THOU = "1000"
+        THOU_1 = "1100"
+        THOU_2 = "1200"
+        THOU_3 = "1300"
+        THOU_4 = "1400"
+        THOU_5 = "1500"
+        THOU_6 = "1600"
+        THOU_7 = "1700"
+        THOU_8 = "1800"
+        THOU_9 = "1900"
+        TWO_THOU = "2000"
+        TWO_THOU_250 = "2250"
+        TWO_THOU_500 = "2500"
+        TWO_THOU_750 = "2750"
+        THREE_THOU = "3000"
+        FOUR_THOU = "4000"
+        FIVE_THOU = "5000"
+        SEVEN_THOU_500 = "7500"
+        TEN_THOU = "10000"
+
+    def __init__(self) -> None:
+        self.rf = redfin.Redfin()
+        self.DESIRED_CSV_SCHEMA = {
+            "ADDRESS": str,
+            "CITY": str,
+            "PROPERTY TYPE": str,
+            "STATE OR PROVINCE": str,
+            "YEAR BUILT": pl.UInt16,
+            "ZIP OR POSTAL CODE": pl.UInt32,
+            "PRICE": pl.UInt32,
+            "SQUARE FEET": pl.UInt32,
+            "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)": str,
+            "LATITUDE": pl.Float32,
+            "LONGITUDE": pl.Float32,
+        }
+        self.STRING_ZIP_CSV_SCHEMA = {
+            "ADDRESS": str,
+            "CITY": str,
+            "PROPERTY TYPE": str,
+            "STATE OR PROVINCE": str,
+            "YEAR BUILT": pl.UInt16,
+            "ZIP OR POSTAL CODE": pl.Utf8,
+            "PRICE": pl.UInt32,
+            "SQUARE FEET": pl.UInt32,
+            "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)": str,
+            "LATITUDE": pl.Float32,
+            "LONGITUDE": pl.Float32,
+        }
+        self.search_params = None
+        self.column_dict = {key: False for key in CATEGORY_PATTERNS.keys()}
+
+    def set_search_params(self, zip: str, search_filters: dict[str, Any]) -> None:
+        """Set the parameters for searching by ZIP code.
+
+        Args:
+            zip (str): the ZIP code
+            search_filters (dict[str, Any]): search filters for appending to a gis-csv path
+        """
+        try:
+            region_info = self.get_region_info_from_zipcode(zip)
+        except json.JSONDecodeError:
+            log(f"Could not decode region info for {zip}.", "warn")
+            return None
+        except HTTPError:
+            log(f"Could not retrieve region info for {zip}.", "warn")
+            return None
+
+        if search_filters.get("for sale sold") == "Sold":
+            sort_order = self.SortOrder.MOST_RECENTLY_SOLD.value
+        else:
+            sort_order = self.SortOrder.NEWEST.value
+        # TODO make sure to fix filtering so that its not just "single family homes"
+
+        try:
+            market = region_info["payload"]["rootDefaults"]["market"]
+            region_id = region_info["payload"]["rootDefaults"]["region_id"]
+            status = str(region_info["payload"]["rootDefaults"]["status"])
+        except KeyError:
+            log("Market, region, or status could not be identified ", "warn")
+            return None
+
+        self.search_params = {
+            "al": 1,
+            "has_deal": "false",
+            "has_dishwasher": "false",
+            "has_laundry_facility": "false",
+            "has_laundry_hookups": "false",
+            "has_parking": "false",
+            "has_pool": "false",
+            "has_short_term_lease": "false",
+            "include_pending_homes": "false",  # probably an "include" option
+            "isRentals": "false",
+            "is_furnished": "false",
+            "is_income_restricted": "false",
+            "is_senior_living": "false",
+            "max_year_built": search_filters.get("max year built"),
+            "min_year_built": search_filters.get("min year built"),
+            "market": market,
+            "min_stories": search_filters.get("min stories"),
+            "num_homes": 350,
+            "ord": sort_order,
+            "page_number": "1",
+            "pool": "false",
+            "region_id": region_id,
+            "region_type": "2",
+            "status": status,
+            "travel_with_traffic": "false",
+            "travel_within_region": "false",
+            "utilities_included": "false",
+            "v": "8",
+        }
+        if search_filters.get("for sale sold") == "Sold":
+            self.search_params["sold_within_days"] = search_filters.get("sold within")
+            self.search_params["status"] = 9
+        else:
+            self.search_params["sf"] = "1, 2, 3, 4, 5, 6, 7"
+            match [
+                search_filters.get("status coming soon"),
+                search_filters.get("status active"),
+                search_filters.get("status pending"),
+            ]:
+                case [True, False, False]:
+                    status = "8"
+                case [False, True, False]:
+                    status = "1"
+                case [False, False, True]:
+                    status = "130"
+                case [True, True, False]:
+                    status = "9"
+                case [False, True, True]:
                     status = "139"
-
-            self.search_params["status"] = status
-
-        if (max_sqft := search_filters.get("max sqft")) != "None":
-            self.search_params["max_sqft"] = max_sqft
-        if (min_sqft := search_filters.get("min sqft")) != "None":
-            self.search_params["min_sqft"] = min_sqft
-
-        if (max_price := search_filters.get("max price")) != "None":
-            self.search_params["max_price"] = max_price
-        if (min_price := search_filters.get("min price")) != "None":
-            self.search_params["min_price"] = min_price
-
-        houses = ""  # figure out how to join into comma string
-        if search_filters.get("house type house") is True:
-            houses = houses + "1"
-        if search_filters.get("house type condo") is True:
-            houses = houses + "2"
-        if search_filters.get("house type townhouse") is True:
-            houses = houses + "3"
-        if search_filters.get("house type mul fam") is True:
-            houses = houses + "4"
-
-        self.search_params["uipt"] = ",".join(list(houses))
-
-    # redfin setup
-    def meta_request_download(self, url: str, search_params) -> str:
-        """Method for downloading objects from Redfin.
+                case [True, False, True]:
+                    status = "138"
+                case [True, True, True]:
+                    status = "139"
+
+            self.search_params["status"] = status
+
+        if (max_sqft := search_filters.get("max sqft")) != "None":
+            self.search_params["max_sqft"] = max_sqft
+        if (min_sqft := search_filters.get("min sqft")) != "None":
+            self.search_params["min_sqft"] = min_sqft
+
+        if (max_price := search_filters.get("max price")) != "None":
+            self.search_params["max_price"] = max_price
+        if (min_price := search_filters.get("min price")) != "None":
+            self.search_params["min_price"] = min_price
+
+        houses = ""  # figure out how to join into comma string
+        if search_filters.get("house type house") is True:
+            houses = houses + "1"
+        if search_filters.get("house type condo") is True:
+            houses = houses + "2"
+        if search_filters.get("house type townhouse") is True:
+            houses = houses + "3"
+        if search_filters.get("house type mul fam") is True:
+            houses = houses + "4"
+
+        self.search_params["uipt"] = ",".join(list(houses))
 
-        Args:
-            url (str): the Redfin URL
-
-        Returns:
-            str: the unicode text response
-        """
-        response = requests.get(
-            self.rf.base + url, params=search_params, headers=self.rf.user_agent_header
-        )
-        log(response.request.url, "debug")
-        response.raise_for_status()
-        return response.text
-
-    def working_below_the_fold(self, property_id: str, listing_id: str = "") -> Any:
-        """A below_the_fold method that accepts a listing ID.
-        Note:
-            If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it
-
-        Args:
-            property_id (str): the property ID
-            listing_id (str): The listing ID. Defaults to False.
+    # redfin setup
+    def meta_request_download(self, url: str, search_params) -> str:
+        """Method for downloading objects from Redfin.
+
+        Args:
+            url (str): the Redfin URL
+
+        Returns:
+            str: the unicode text response
+        """
+        response = requests.get(
+            self.rf.base + url, params=search_params, headers=self.rf.user_agent_header
+        )
+        log(response.request.url, "debug")
+        response.raise_for_status()
+        return response.text
+
+    def working_below_the_fold(self, property_id: str, listing_id: str = "") -> Any:
+        """A below_the_fold method that accepts a listing ID.
+        Note:
+            If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it
 
-        Returns:
-            Any: response
-        """
-        if listing_id:
-            params = {
-                "accessLevel": 1,
-                "propertyId": property_id,
-                "listingId": listing_id,
-                "pageType": 1,
-            }
-        else:
-            params = {
-                "accessLevel": 1,
-                "propertyId": property_id,
-                "pageType": 1,
-            }
-        return self.rf.meta_request("/api/home/details/belowTheFold", params)
-
-    def get_region_info_from_zipcode(self, zip_code: str) -> Any:
-        """Get the region ifo from a ZIP code.
-
-        Args:
-            zip_code (str): the ZIP code
-
-        Returns:
-            Any: response
-        """
-        return self.rf.meta_request(
-            "api/region", {"region_id": zip_code, "region_type": 2, "tz": True, "v": 8}
-        )
-
-    def get_gis_csv(self, params: dict[str, Any]) -> str:
-        """Get the gis-csv of an area based on the contents of `params`
-
-        Args:
-            params (dict[str, Any]): the parameters
-
-        Returns:
-            str: the CSV file as a unicode string
-        """
-        return self.meta_request_download("api/gis-csv", search_params=params)
-
-    # calls stuff
-    def get_heating_info_from_super_group(self, super_group: dict) -> list[str]:
-        """Extract heating information from a super group
+        Args:
+            property_id (str): the property ID
+            listing_id (str): The listing ID. Defaults to False.
+
+        Returns:
+            Any: response
+        """
+        if listing_id:
+            params = {
+                "accessLevel": 1,
+                "propertyId": property_id,
+                "listingId": listing_id,
+                "pageType": 1,
+            }
+        else:
+            params = {
+                "accessLevel": 1,
+                "propertyId": property_id,
+                "pageType": 1,
+            }
+        return self.rf.meta_request("/api/home/details/belowTheFold", params)
+
+    def get_region_info_from_zipcode(self, zip_code: str) -> Any:
+        """Get the region ifo from a ZIP code.
+
+        Args:
+            zip_code (str): the ZIP code
+
+        Returns:
+            Any: response
+        """
+        return self.rf.meta_request(
+            "api/region", {"region_id": zip_code, "region_type": 2, "tz": True, "v": 8}
+        )
+
+    def get_gis_csv(self, params: dict[str, Any]) -> str:
+        """Get the gis-csv of an area based on the contents of `params`
+
+        Args:
+            params (dict[str, Any]): the parameters
+
+        Returns:
+            str: the CSV file as a unicode string
+        """
+        return self.meta_request_download("api/gis-csv", search_params=params)
 
-        :
-            Must supply a probable heating group for accurate information
+    def _rate_limit(self) -> None:
+        time.sleep(random.uniform(1, 1.6))
 
-            Format of super group in JSON:
-            {
-                types: []
-                amenityGroups: [
-                    {
-                        groupTitle: ""
-                        referenceName : ""
-                        amenityEntries : [
-                            {
-                                amenityName : ""
-                                referenceName: ""
-                                accessLevel : 1
-                                displayLevel : 1
-                                amenityValues : []
-                            },...
-                        ]
-                    }
-                ]
-                titleString: ""
-            }
-
-            Format of groupTitle/propertyDetailsHeader on website:
-                Interior -> titleString
-                ...
-                    Heating & Cooling -> groupTitle
-                        Electric -> no amenityName
-                        Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName
-                        Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In
-
-        Args:
-            super_group (dict): the super group to extract terms from
-
-        Returns:
-            list[str]: list of heating terms
-        """
-        amenity_values = []
-        for amenity in super_group.get("amenityGroups", ""):  #
-            if not any(
-                AMENITY_GROUP_INCLUDE_PATTERNS.findall(amenity.get("groupTitle", ""))
-            ):
-                continue  # this is the name that is bold
-            # these are the bulleted items.
-            for amenity_entry in amenity.get("amenityEntries", ""):
-                # if == "", then item is dangling (no word before colon). give the same treatment to "utilities: ..." as if it were ==""
-                amenity_name = amenity_entry.get("amenityName", "")
-
-                if amenity_name and not any(
-                    re.compile("utilit", re.I).findall(amenity_name)
-                ):
-                    # filter the before colon. first if is to have stricter capture rule when amenity item is "Utilities: Natural gas, heat pump, ..."
-                    if any(
-                        AMENITY_NAME_INCLUDE_PATTERNS.findall(amenity_name)
-                    ) and not any(AMENITY_NAME_EXCLUDE_PATTERNS.findall(amenity_name)):
-                        amenity_values.extend(
-                            [
-                                value
-                                for value in amenity_entry.get("amenityValues", "")
-                                if any(
-                                    regex.findall(value)
-                                    for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS
-                                )
-                                and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))
-                            ]
-                        )
-                else:
-                    # filter for appliance if dangling or in utilities bullet item
-                    amenity_values.extend(
-                        [
-                            value
-                            for value in amenity_entry.get("amenityValues", "")
-                            if any(
-                                regex.findall(value)
-                                for regex in APPLIANCE_HEATING_RELATED_PATTERNS
-                            )
-                        ]
-                    )
-        return amenity_values
-
-    def get_super_groups_from_url(self, listing_url: str) -> list | None:
-        """Get super group list from listing url.
-
-        Args:
-            listing_url (str): The path part of the listing URL. This is without the "redfin.com" part. Include the first forward slash
-
-        Returns:
-            list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found
-        """
-        if "redfin" in listing_url:
-            listing_url = urlparse(listing_url).path
-
-        try:
-            time.sleep(random.uniform(1.2, 2.1))
-            initial_info = self.rf.initial_info(listing_url)
-        except json.JSONDecodeError:
-            log(f"Could not get initial info for {listing_url =}", "warn")
-            return None
-        try:
-            property_id = initial_info["payload"]["propertyId"]
-        except KeyError:
-            log("Could not find property id", "critical")
-            return None
-        try:
-            listing_id = initial_info["payload"]["listingId"]
-        except KeyError:
-            listing_id = None
-            log(
-                "Could not find listing id. Will try to continue. if errors in final zip csv, this might be the issue",
-                "warn",
-            )
+    # calls stuff
+    def get_heating_info_from_super_group(self, super_group: dict) -> list[str]:
+        """Extract heating information from a super group
+
+        :
+            Must supply a probable heating group for accurate information
+
+            Format of super group in JSON:
+            {
+                types: []
+                amenityGroups: [
+                    {
+                        groupTitle: ""
+                        referenceName : ""
+                        amenityEntries : [
+                            {
+                                amenityName : ""
+                                referenceName: ""
+                                accessLevel : 1
+                                displayLevel : 1
+                                amenityValues : []
+                            },...
+                        ]
+                    }
+                ]
+                titleString: ""
+            }
+
+            Format of groupTitle/propertyDetailsHeader on website:
+                Interior -> titleString
+                ...
+                    Heating & Cooling -> groupTitle
+                        Electric -> no amenityName
+                        Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName
+                        Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In
+
+        Args:
+            super_group (dict): the super group to extract terms from
+
+        Returns:
+            list[str]: list of heating terms
+        """
+        amenity_values = []
+        utility_regex = re.compile("utilit", re.I)
+        heating_and_cooling_regex = re.compile("heat")
+        for amenity in super_group.get("amenityGroups", ""):
+            group_title = amenity.get("groupTitle", "")
+            if not any(AMENITY_GROUP_INCLUDE_PATTERNS.findall(group_title)):
+                continue  # this is the name that is bold
+            # these are the bulleted items.
+            for amenity_entry in amenity.get("amenityEntries", ""):
+                # if == "", then item is dangling (no word before colon). give the same treatment to "utilities: ..." as if it were ==""
+                amenity_name = amenity_entry.get("amenityName", "")
+
+                if amenity_name and not any(utility_regex.findall(amenity_name)):
+                    # filter the before colon. first if is to have stricter capture rule when amenity item is "Utilities: Natural gas, heat pump, ..."
+                    if any(
+                        AMENITY_NAME_INCLUDE_PATTERNS.findall(amenity_name)
+                    ) and not any(AMENITY_NAME_EXCLUDE_PATTERNS.findall(amenity_name)):
+                        amenity_values.extend(
+                            [
+                                value
+                                for value in amenity_entry.get("amenityValues", "")
+                                if any(
+                                    regex.findall(value)
+                                    for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS
+                                )
+                                and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))
+                            ]
+                        )
+                elif any(heating_and_cooling_regex.findall(group_title)):
+                    # if we are in "heating & cooling" and we are a dangling element
+                    amenity_values.extend(
+                        [
+                            value
+                            for value in amenity_entry.get("amenityValues", "")
+                            if any(
+                                regex.findall(value)
+                                for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS
+                            )
+                            and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))
+                        ]
+                    )
+                else:
+                    # filter for appliance only if we are a dangling element or in the utilities bullet item
+                    amenity_values.extend(
+                        [
+                            value
+                            for value in amenity_entry.get("amenityValues", "")
+                            if any(
+                                regex.findall(value)
+                                for regex in APPLIANCE_HEATING_RELATED_PATTERNS
+                            )
+                        ]
+                    )
+        return amenity_values
+
+    def get_super_groups_from_url(self, listing_url: str) -> list | None:
+        """Get super group list from listing url.
+
+        Args:
+            listing_url (str): The path part of the listing URL. This is without the "redfin.com" part. Include the first forward slash
+
+        Returns:
+            list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found
+        """
+        if "redfin" in listing_url:
+            listing_url = urlparse(listing_url).path
+
         try:
-            time.sleep(random.uniform(1.1, 2.1))
-            if listing_id is None:
-                mls_data = self.working_below_the_fold(property_id)
-            else:
-                mls_data = self.working_below_the_fold(property_id, listing_id)
-        except json.JSONDecodeError:
-            log(f"Could not find mls details for {listing_url = }", "warn")
-            return None
-        try:
-            super_groups = mls_data["payload"]["amenitiesInfo"]["superGroups"]
-        except KeyError:
-            log(f"Could not find property details for {listing_url = }", "warn")
-            return None
-        return super_groups
-
-    def get_heating_terms_dict_from_listing(
-        self, address_and_url_list: list[str]
-    ) -> dict[str, bool]:
-        """Generate a filled out dictionary based on `self.column_dict` and the contents of :meth:get_heating_info_from_super_group(address_url_list).
-
-        TODO:
-            Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen
-
-        Args:
-            address_and_url_list (list[str]): address in the first position, and the listing URL in the second position
-
-        Returns:
-            dict[str, bool]: the filled out `self.column_dict` for the supplied address/listing URL
-        """
-        address = address_and_url_list[0]
-        listing_url = address_and_url_list[1]
-        terms = []
-
-        super_groups = self.get_super_groups_from_url(listing_url)
-        if super_groups is None:
-            log(
-                "No amenities found", "info"
-            )  # this and "There was no heating information for {address}" should be made in caller?
-            return copy.deepcopy(self.column_dict)
-        for super_group in super_groups:  # dict
-            if any(
-                SUPER_GROUP_INCLUDE_PATTERNS.findall(super_group.get("titleString", ""))
-            ):
-                terms.extend(
-                    self.get_heating_info_from_super_group(super_group)
-                )  # this will be like [gas, electricity, heat pump]
-        if len(terms) == 0:
-            log(
-                f"There was no heating information for {urlparse(listing_url).path}",
-                "info",
-            )
-            return copy.deepcopy(self.column_dict)
-
-        # categorize the correct dict and return
-        master_dict = copy.deepcopy(self.column_dict)
-        for input_string in terms:
-            log(f"{input_string = }", "debug")
-            result = {}
-            for key, pattern in CATEGORY_PATTERNS.items():
-                if bool(re.search(pattern, input_string)):
-                    result[key] = True
-                    log(f"Pattern matched on {key, pattern = }", "debug")
-                log(f"Pattern did not match on {key, pattern = }", "debug")
-            for key in result.keys():
-                master_dict[key] = result[key] | master_dict[key]
-
-        # You'll have to df.unnest this for use in a dataframe
-        log(f"{terms = }", "debug")
-        log(f"{master_dict = }", "debug")
-        log(f"Heating amenities found for {address}.", "info")
-        return master_dict
-
-    def get_gis_csv_from_zip_with_filters(
-        self,
-    ) -> pl.DataFrame | None:
-        """Clean the GIS CSV retrieved from using the `search_params` field into the desired schema.
-
-        Returns:
-            pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.
-        """
-        if self.search_params is None:
-            return
-        csv_text = self.get_gis_csv(self.search_params)
-
-        home_types: str = self.search_params.get("uipt", "")
-        if "1" in home_types:
-            home_types = home_types.replace("1", "Single Family Residential")
-        if "2" in home_types:
-            home_types = home_types.replace("2", "Condo/Co-op")
-        if "3" in home_types:
-            home_types = home_types.replace("3", "Townhouse")
-        if "4" in home_types:
-            home_types = home_types.replace("4", r"Multi-Family \(2-4 Unit\)")
-
-        try:
-            df = (
-                pl.read_csv(io.StringIO(csv_text), dtypes=self.DESIRED_CSV_SCHEMA)
-                .filter(
-                    pl.col("PROPERTY TYPE").str.contains(
-                        "|".join(home_types.split(","))
-                    )
-                )
-                .select(
-                    "ADDRESS",
-                    "CITY",
-                    "STATE OR PROVINCE",
-                    "YEAR BUILT",
-                    "ZIP OR POSTAL CODE",
-                    "PRICE",
-                    "SQUARE FEET",
-                    "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)",
-                    "LATITUDE",
-                    "LONGITUDE",
-                )
-            )
-            if df.height == 0:
-                log(
-                    "CSV was empty. This can happen if local MLS rules dont allow downloads.",
-                    "debug",
-                )
-                return None
-        except Exception as e:
-            log(f"Could not read gis csv into dataframe.\n{csv_text = }\n{e}", "warn")
-            return None
-        return df
-
-    def get_gis_csv_for_zips_in_metro_with_filters(
-        self, msa_name: str, search_filters: dict[str, Any]
-    ) -> pl.DataFrame | None:
-        """Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.
-
-        Args:
-            msa_name (str): a Metropolitan Statistical Area
-            search_filters (dict[str, Any]): filters to search with. generate using :meth:
-
-        Returns:
-            pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs
-        """
-        log(f"Searching {msa_name} with filters {search_filters}.", "log")
-        zip_codes = metro_name_to_zip_code_list(msa_name)
-        formatted_zip_codes = [f"{zip_code:0{5}}" for zip_code in zip_codes]
-        log(
-            f"Estimated search time: {len(formatted_zip_codes) * (1.75+1.5)}",
-            "info",
-        )
-        list_of_csv_dfs = []
-        for zip in formatted_zip_codes:
-            time.sleep(random.uniform(1.5, 2))
-            self.set_search_params(zip, search_filters)
-            temp = self.get_gis_csv_from_zip_with_filters()
-            if temp is None:
-                log(f"Did not find any houses in {zip}.", "info")
-                continue
-            log(f"Found data for {temp.height} houses in {zip}.", "info")
-            list_of_csv_dfs.append(temp)
-
-        if len(list_of_csv_dfs) == 0:
-            return None
-        return pl.concat(list_of_csv_dfs)
-
-    def get_house_attributes_from_metro(
-        self,
-        msa_name: str,
-        search_filters: dict[str, Any],
-        use_cached_gis_csv_csv: bool = False,
-    ) -> None:
-        """Main function. Get the heating attributes of a Metropolitan Statistical Area.
-
-        TODO:
-            statistics on metropolitan
-            Log statistics about the heating outlook of a metro.
-
-        Args:
-            msa_name (str): Metropolitan Statistical Area name
-            search_filters (dict[str, Any]): search filters
-            use_cached_gis_csv_csv (bool, optional): Whether to use an already made GIS CSV DataFrame. Defaults to False.
-
-        Returns:
-            None: None if there were no houses found in the metro
-        """
-        msa_name_file_safe = msa_name.strip().replace(", ", "_").replace(" ", "_")
-        metro_output_dir_path = Path(OUTPUT_DIR_PATH) / msa_name_file_safe
-
-        if use_cached_gis_csv_csv:
-            log("Loading csv from cache.", "info")
-            try:
-                search_page_csvs_df = pl.read_csv(
-                    metro_output_dir_path / (msa_name_file_safe + ".csv"),
-                    dtypes=self.DESIRED_CSV_SCHEMA,
-                )
-                log(
-                    f"Loading csv from {metro_output_dir_path / (msa_name_file_safe + ".csv")} is complete.",
-                    "info",
-                )
-            except FileNotFoundError:
-                log(
-                    f"Loading csv from {metro_output_dir_path / (msa_name_file_safe + ".csv")} has failed, continuing with API search.",
-                    "info",
-                )
-                search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(
-                    msa_name, search_filters
-                )
-        else:
-            search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(
-                msa_name, search_filters
-            )
-
-        if search_page_csvs_df is None:
-            log(f"No houses found within {msa_name}. Try relaxing filters.", "info")
-            return None
-
-        url_col_name = "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)"
-        search_page_csvs_df = search_page_csvs_df.filter(
-            (~pl.col(url_col_name).str.contains("(?i)unknown"))
-            .and_(pl.col("ADDRESS").str.len_chars().gt(0))
-            .and_(pl.col("SQUARE FEET").is_not_null())
-            .and_(pl.col("YEAR BUILT").is_not_null())
-        )
-        # .unique(subset=["LATITUDE", "LONGITUDE"], maintain_order=True)
-        # sometimes when there are two of the same listings you'll see the lot and the house. cant determine at this stage, so just leaving duplicates. hopefully this can be handled in viewer
-        # also somehow gets GIS-CSV for search pages that dont allow it
-
-        log(f"Found {search_page_csvs_df.height} possible houses in {msa_name}", "info")
-        os.makedirs(metro_output_dir_path, exist_ok=True)
-        log(
-            f"Writing csv for metro to {metro_output_dir_path / (msa_name_file_safe + ".csv")}",
-            "debug",
-        )
-        search_page_csvs_df.write_csv(
-            metro_output_dir_path / (msa_name_file_safe + ".csv")
-        )
-
-        # go through whole csv and get the house attributes for each house. then partition the dataframe by ZIP and save files
-
-        log("Starting lookups on listing URLS", "info")
-        log(
-            f"Unique ZIP codes: {search_page_csvs_df["ZIP OR POSTAL CODE"].n_unique()}",
-            "info",
-        )
-        log(
-            f"Estimated completion time: {search_page_csvs_df.height * 3.58} seconds",
-            "info",
-        )
-
-        list_of_dfs_by_zip = search_page_csvs_df.partition_by("ZIP OR POSTAL CODE")
-
-        for df_of_zip in list_of_dfs_by_zip:
-            df_of_zip = (
-                df_of_zip.with_columns(
-                    pl.concat_list([pl.col("ADDRESS"), pl.col(url_col_name)])
-                    .map_elements(self.get_heating_terms_dict_from_listing)
-                    .alias("nest")
-                )
-                .drop(url_col_name)
-                .unnest("nest")
-            )
-
-            zip = df_of_zip.select("ZIP OR POSTAL CODE").item(0, 0)
-            df_of_zip.write_csv(f"{metro_output_dir_path}{os.sep}{zip}.csv")
-
-        # log(f"In {msa_name}, there are {} homes with Electric fuel, {} homes with Natural Gas, {} homes with Propane, {} homes with Diesel/Heating Oil, {} homes with Wood/Pellet, {} homes with Solar Heating, {} homes with Heat Pumps, {} homes with Baseboard, {} homes with Furnace, {} homes with Boiler, {} homes with Radiator, {} homes with Radiant Floor")
-        log(f"Done with searching houses in {msa_name}!", "info")
+            self._rate_limit()
+            initial_info = self.rf.initial_info(listing_url)
+        except json.JSONDecodeError:
+            log(f"Could not get initial info for {listing_url =}", "critical")
+            return None
+        try:
+            property_id = initial_info["payload"]["propertyId"]
+        except KeyError:
+            log("Could not find property id", "critical")
+            return None
+        try:
+            listing_id = initial_info["payload"]["listingId"]
+        except KeyError:
+            listing_id = None
+            log(
+                "Could not find listing id. Will try to continue. if errors in final zip csv, this might be the issue",
+                "debug",
+            )
+        try:
+            self._rate_limit()
+            if listing_id is None:
+                mls_data = self.working_below_the_fold(property_id)
+            else:
+                mls_data = self.working_below_the_fold(property_id, listing_id)
+        except json.JSONDecodeError:
+            log(f"Could not find mls details for {listing_url = }", "warn")
+            return None
+        try:
+            super_groups = mls_data["payload"]["amenitiesInfo"]["superGroups"]
+        except KeyError:
+            log(f"Could not find property details for {listing_url = }", "warn")
+            return None
+        return super_groups
+
+    def get_heating_terms_dict_from_listing(
+        self, address_and_url_list: list[str]
+    ) -> dict[str, bool]:
+        """Generate a filled out dictionary based on `self.column_dict` and the contents of :meth:get_heating_info_from_super_group(address_url_list).
+
+        TODO:
+            Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen
+
+        Args:
+            address_and_url_list (list[str]): address in the first position, and the listing URL in the second position
+
+        Returns:
+            dict[str, bool]: the filled out `self.column_dict` for the supplied address/listing URL
+        """
+        address = address_and_url_list[0]
+        listing_url = address_and_url_list[1]
+        terms = []
+
+        super_groups = self.get_super_groups_from_url(listing_url)
+        if super_groups is None:
+            log("No amenities found", "info")
+            return copy.deepcopy(self.column_dict)
+        for super_group in super_groups:  # dict
+            if any(
+                SUPER_GROUP_INCLUDE_PATTERNS.findall(super_group.get("titleString", ""))
+            ):
+                terms.extend(self.get_heating_info_from_super_group(super_group))
+        if len(terms) == 0:
+            log(
+                f"There was no heating information for {urlparse(listing_url).path}",
+                "info",
+            )
+            return copy.deepcopy(self.column_dict)
+
+        # categorize the correct dict and return
+        master_dict = copy.deepcopy(self.column_dict)
+        for input_string in terms:
+            log(f"{input_string = }", "debug")
+            result = {}
+            for key, pattern in CATEGORY_PATTERNS.items():
+                if bool(re.search(pattern, input_string)):
+                    result[key] = True
+                    log(f"Pattern matched on {key, pattern = }", "debug")
+                log(f"Pattern did not match on {key, pattern = }", "debug")
+            for key in result.keys():
+                master_dict[key] = result[key] | master_dict[key]
+
+        # You'll have to df.unnest this for use in a dataframe
+        log(f"{terms = }", "debug")
+        log(f"{master_dict = }", "debug")
+        log(f"Heating amenities found for {address}.", "info")
+        return master_dict
+
+    def get_gis_csv_from_zip_with_filters(
+        self,
+    ) -> pl.DataFrame | None:
+        """Clean the GIS CSV retrieved from using the `search_params` field into the desired schema.
+
+        Returns:
+            pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.
+        """
+        if self.search_params is None:
+            return
+        csv_text = self.get_gis_csv(self.search_params)
+
+        home_types: str = self.search_params.get("uipt", "")
+        if "1" in home_types:
+            home_types = home_types.replace("1", "Single Family Residential")
+        if "2" in home_types:
+            home_types = home_types.replace("2", "Condo/Co-op")
+        if "3" in home_types:
+            home_types = home_types.replace("3", "Townhouse")
+        if "4" in home_types:
+            home_types = home_types.replace("4", "Multi-Family (2-4 Unit)")
+
+        try:
+            df = (
+                pl.read_csv(
+                    io.StringIO(csv_text),
+                    dtypes=self.STRING_ZIP_CSV_SCHEMA,
+                )
+                .with_columns(
+                    pl.col("ZIP OR POSTAL CODE").str.extract(r"([0-9]{5})", 1)
+                )
+                .cast({"ZIP OR POSTAL CODE": pl.UInt32})
+                .filter(
+                    pl.col("PROPERTY TYPE").str.contains(
+                        "|".join(home_types.split(","))
+                    )
+                )
+                .select(
+                    "ADDRESS",
+                    "CITY",
+                    "STATE OR PROVINCE",
+                    "YEAR BUILT",
+                    "ZIP OR POSTAL CODE",
+                    "PRICE",
+                    "SQUARE FEET",
+                    "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)",
+                    "LATITUDE",
+                    "LONGITUDE",
+                )
+            )
+            if df.height == 0:
+                log(
+                    "CSV was empty. This can happen if local MLS rules dont allow downloads.",
+                    "debug",
+                )
+                return None
+        except Exception as e:
+            log(f"Could not read gis csv into dataframe.\n{csv_text = }\n{e}", "warn")
+            return None
+        return df
+
+    def get_gis_csv_for_zips_in_metro_with_filters(
+        self, msa_name: str, search_filters: dict[str, Any]
+    ) -> pl.DataFrame | None:
+        """Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.
+
+        Args:
+            msa_name (str): a Metropolitan Statistical Area
+            search_filters (dict[str, Any]): filters to search with. generate using :meth:
+
+        Returns:
+            pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs
+        """
+        log(f"Searching {msa_name} with filters {search_filters}.", "log")
+        zip_codes = metro_name_to_zip_code_list(msa_name)
+        formatted_zip_codes = [f"{zip_code:0{5}}" for zip_code in zip_codes]
+        log(
+            f"Estimated search time: {len(formatted_zip_codes) * 4.5}",
+            "info",
+        )
+        list_of_csv_dfs = []
+        for zip in formatted_zip_codes:
+            self._rate_limit()
+            self.set_search_params(zip, search_filters)
+            temp = self.get_gis_csv_from_zip_with_filters()
+            if temp is None:
+                log(f"Did not find any houses in {zip}.", "info")
+                continue
+            log(f"Found data for {temp.height} houses in {zip}.", "info")
+            list_of_csv_dfs.append(temp)
+
+        if len(list_of_csv_dfs) == 0:
+            return None
+        return pl.concat(list_of_csv_dfs)
+
+    def get_house_attributes_from_metro(
+        self,
+        msa_name: str,
+        search_filters: dict[str, Any],
+        use_cached_gis_csv_csv: bool = False,
+    ) -> None:
+        """Main function. Get the heating attributes of a Metropolitan Statistical Area.
+
+        TODO:
+            statistics on metropolitan
+            Log statistics about the heating outlook of a metro.
+
+        Args:
+            msa_name (str): Metropolitan Statistical Area name
+            search_filters (dict[str, Any]): search filters
+            use_cached_gis_csv_csv (bool, optional): Whether to use an already made GIS CSV DataFrame. Defaults to False.
+
+        Returns:
+            None: None if there were no houses found in the metro
+        """
+        file_safe_msa_name = msa_name.strip().replace(", ", "_").replace(" ", "_")
+        METRO_OUTPUT_DIR_PATH = OUTPUT_DIR_PATH / file_safe_msa_name
+
+        if use_cached_gis_csv_csv:
+            log("Loading csv from cache.", "info")
+            try:
+                search_page_csvs_df = pl.read_csv(
+                    METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv"),
+                    dtypes=self.DESIRED_CSV_SCHEMA,
+                )
+                log(
+                    f"Loading csv from {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv")} is complete.",
+                    "info",
+                )
+            except FileNotFoundError:
+                log(
+                    f"Loading csv from {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv")} has failed, continuing with API search.",
+                    "info",
+                )
+                search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(
+                    msa_name, search_filters
+                )
+        else:
+            search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(
+                msa_name, search_filters
+            )
+
+        if search_page_csvs_df is None:
+            log(f"No houses found within {msa_name}. Try relaxing filters.", "info")
+            return None
+
+        url_col_name = "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)"
+        search_page_csvs_df = search_page_csvs_df.filter(
+            (~pl.col(url_col_name).str.contains("(?i)unknown"))
+            .and_(pl.col("ADDRESS").str.len_chars().gt(0))
+            .and_(pl.col("SQUARE FEET").is_not_null())
+            .and_(pl.col("YEAR BUILT").is_not_null())
+        )
+        # doing this twice so that the search page does not have nulls in the year built column.
+        min_year_built = search_filters.get("min year built")
+        max_year_built = search_filters.get("max year built")
+        assert min_year_built is not None and max_year_built is not None
+
+        # max() Acts like a Boolean OR
+        search_page_csvs_df = (
+            search_page_csvs_df.filter(
+                pl.col("YEAR BUILT")
+                .ge(int(min_year_built))
+                .and_(pl.col("YEAR BUILT").le(int(max_year_built)))
+            )
+            .group_by(by=["LATITUDE", "LONGITUDE"])
+            .max()
+        )
+
+        log(f"Found {search_page_csvs_df.height} possible houses in {msa_name}", "info")
+        METRO_OUTPUT_DIR_PATH.mkdir(parents=True, exist_ok=True)
+        log(
+            f"Writing csv for metro to {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv")}",
+            "debug",
+        )
+        search_page_csvs_df.write_csv(
+            METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv")
+        )
+
+        # go through whole csv and get the house attributes for each house. then partition the dataframe by ZIP and save files
+
+        log("Starting lookups on listing URLS", "info")
+        log(
+            f"Unique ZIP codes: {search_page_csvs_df["ZIP OR POSTAL CODE"].n_unique()}",
+            "info",
+        )
+        log(
+            f"Estimated completion time: {search_page_csvs_df.height * 4.5} seconds",
+            "info",
+        )
+
+        list_of_dfs_by_zip = search_page_csvs_df.partition_by("ZIP OR POSTAL CODE")
+
+        for i, _ in enumerate(list_of_dfs_by_zip):
+            list_of_dfs_by_zip[i] = (
+                list_of_dfs_by_zip[i]
+                .with_columns(
+                    pl.concat_list([pl.col("ADDRESS"), pl.col(url_col_name)])
+                    .map_elements(self.get_heating_terms_dict_from_listing)
+                    .alias("nest")
+                )
+                .drop(url_col_name)
+                .unnest("nest")
+            )
+
+            zip = list_of_dfs_by_zip[i].select("ZIP OR POSTAL CODE").item(0, 0)
+            list_of_dfs_by_zip[i].write_csv(f"{METRO_OUTPUT_DIR_PATH / str(zip)}.csv")
+
+        if len(list_of_dfs_by_zip) > 0:
+            concat_df = pl.concat(list_of_dfs_by_zip)
+            log(f"Information on {msa_name}:", "info")
+            log(
+                f"num entries: {concat_df.height}, avg. house price: ${concat_df.get_column("PRICE").mean():,.2f}, electric houses: {concat_df.get_column("Electricity").sum()}, gas houses: {concat_df.get_column("Natural Gas").sum()}, propane houses: {concat_df.get_column("Propane").sum()}, oil-fed houses: {concat_df.get_column("Diesel/Heating Oil").sum()}, wood-fed houses: {concat_df.get_column("Wood/Pellet").sum()}, solar-heated houses: {concat_df.get_column("Solar Heating").sum()}, heat pump houses: {concat_df.get_column("Heat Pump").sum()}, baseboard houses: {concat_df.get_column("Baseboard").sum()}, furnace houses: {concat_df.get_column("Furnace").sum()}, boiler houses: {concat_df.get_column("Boiler").sum()}, radiator houses: {concat_df.get_column("Radiator").sum()}, houses with radiant floors: {concat_df.get_column("Radiant Floor").sum()}",
+                "info",
+            )
+
+            concat_df.write_csv(f"{METRO_OUTPUT_DIR_PATH}/full_info.csv")
+
+        log(f"Done with searching houses in {msa_name}!", "info")
 
@@ -2251,25 +2357,25 @@

Source code in src\backend\redfinscraper.py -
399
-400
-401
-402
-403
+            
def get_gis_csv(self, params: dict[str, Any]) -> str:
-    """Get the gis-csv of an area based on the contents of `params`
-
-    Args:
-        params (dict[str, Any]): the parameters
-
-    Returns:
-        str: the CSV file as a unicode string
-    """
-    return self.meta_request_download("api/gis-csv", search_params=params)
+408
+409
+410
+411
+412
def get_gis_csv(self, params: dict[str, Any]) -> str:
+    """Get the gis-csv of an area based on the contents of `params`
+
+    Args:
+        params (dict[str, Any]): the parameters
+
+    Returns:
+        str: the CSV file as a unicode string
+    """
+    return self.meta_request_download("api/gis-csv", search_params=params)
 
@@ -2362,29 +2468,7 @@

Source code in src\backend\redfinscraper.py -
653
-654
-655
-656
-657
-658
-659
-660
-661
-662
-663
-664
-665
-666
-667
-668
-669
-670
-671
-672
-673
-674
-675
+            
675
 676
 677
 678
@@ -2394,39 +2478,61 @@ 

682 683 684 -685

def get_gis_csv_for_zips_in_metro_with_filters(
-    self, msa_name: str, search_filters: dict[str, Any]
-) -> pl.DataFrame | None:
-    """Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.
-
-    Args:
-        msa_name (str): a Metropolitan Statistical Area
-        search_filters (dict[str, Any]): filters to search with. generate using :meth:
-
-    Returns:
-        pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs
-    """
-    log(f"Searching {msa_name} with filters {search_filters}.", "log")
-    zip_codes = metro_name_to_zip_code_list(msa_name)
-    formatted_zip_codes = [f"{zip_code:0{5}}" for zip_code in zip_codes]
-    log(
-        f"Estimated search time: {len(formatted_zip_codes) * (1.75+1.5)}",
-        "info",
-    )
-    list_of_csv_dfs = []
-    for zip in formatted_zip_codes:
-        time.sleep(random.uniform(1.5, 2))
-        self.set_search_params(zip, search_filters)
-        temp = self.get_gis_csv_from_zip_with_filters()
-        if temp is None:
-            log(f"Did not find any houses in {zip}.", "info")
-            continue
-        log(f"Found data for {temp.height} houses in {zip}.", "info")
-        list_of_csv_dfs.append(temp)
-
-    if len(list_of_csv_dfs) == 0:
-        return None
-    return pl.concat(list_of_csv_dfs)
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
def get_gis_csv_for_zips_in_metro_with_filters(
+    self, msa_name: str, search_filters: dict[str, Any]
+) -> pl.DataFrame | None:
+    """Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.
+
+    Args:
+        msa_name (str): a Metropolitan Statistical Area
+        search_filters (dict[str, Any]): filters to search with. generate using :meth:
+
+    Returns:
+        pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs
+    """
+    log(f"Searching {msa_name} with filters {search_filters}.", "log")
+    zip_codes = metro_name_to_zip_code_list(msa_name)
+    formatted_zip_codes = [f"{zip_code:0{5}}" for zip_code in zip_codes]
+    log(
+        f"Estimated search time: {len(formatted_zip_codes) * 4.5}",
+        "info",
+    )
+    list_of_csv_dfs = []
+    for zip in formatted_zip_codes:
+        self._rate_limit()
+        self.set_search_params(zip, search_filters)
+        temp = self.get_gis_csv_from_zip_with_filters()
+        if temp is None:
+            log(f"Did not find any houses in {zip}.", "info")
+            continue
+        log(f"Found data for {temp.height} houses in {zip}.", "info")
+        list_of_csv_dfs.append(temp)
+
+    if len(list_of_csv_dfs) == 0:
+        return None
+    return pl.concat(list_of_csv_dfs)
 
@@ -2475,22 +2581,7 @@

Source code in src\backend\redfinscraper.py -
599
-600
-601
-602
-603
-604
-605
-606
-607
-608
-609
-610
-611
-612
-613
-614
+            
614
 615
 616
 617
@@ -2527,59 +2618,88 @@ 

648 649 650 -651

def get_gis_csv_from_zip_with_filters(
-    self,
-) -> pl.DataFrame | None:
-    """Clean the GIS CSV retrieved from using the `search_params` field into the desired schema.
-
-    Returns:
-        pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.
-    """
-    if self.search_params is None:
-        return
-    csv_text = self.get_gis_csv(self.search_params)
-
-    home_types: str = self.search_params.get("uipt", "")
-    if "1" in home_types:
-        home_types = home_types.replace("1", "Single Family Residential")
-    if "2" in home_types:
-        home_types = home_types.replace("2", "Condo/Co-op")
-    if "3" in home_types:
-        home_types = home_types.replace("3", "Townhouse")
-    if "4" in home_types:
-        home_types = home_types.replace("4", r"Multi-Family \(2-4 Unit\)")
-
-    try:
-        df = (
-            pl.read_csv(io.StringIO(csv_text), dtypes=self.DESIRED_CSV_SCHEMA)
-            .filter(
-                pl.col("PROPERTY TYPE").str.contains(
-                    "|".join(home_types.split(","))
-                )
-            )
-            .select(
-                "ADDRESS",
-                "CITY",
-                "STATE OR PROVINCE",
-                "YEAR BUILT",
-                "ZIP OR POSTAL CODE",
-                "PRICE",
-                "SQUARE FEET",
-                "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)",
-                "LATITUDE",
-                "LONGITUDE",
-            )
-        )
-        if df.height == 0:
-            log(
-                "CSV was empty. This can happen if local MLS rules dont allow downloads.",
-                "debug",
-            )
-            return None
-    except Exception as e:
-        log(f"Could not read gis csv into dataframe.\n{csv_text = }\n{e}", "warn")
-        return None
-    return df
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
def get_gis_csv_from_zip_with_filters(
+    self,
+) -> pl.DataFrame | None:
+    """Clean the GIS CSV retrieved from using the `search_params` field into the desired schema.
+
+    Returns:
+        pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.
+    """
+    if self.search_params is None:
+        return
+    csv_text = self.get_gis_csv(self.search_params)
+
+    home_types: str = self.search_params.get("uipt", "")
+    if "1" in home_types:
+        home_types = home_types.replace("1", "Single Family Residential")
+    if "2" in home_types:
+        home_types = home_types.replace("2", "Condo/Co-op")
+    if "3" in home_types:
+        home_types = home_types.replace("3", "Townhouse")
+    if "4" in home_types:
+        home_types = home_types.replace("4", "Multi-Family (2-4 Unit)")
+
+    try:
+        df = (
+            pl.read_csv(
+                io.StringIO(csv_text),
+                dtypes=self.STRING_ZIP_CSV_SCHEMA,
+            )
+            .with_columns(
+                pl.col("ZIP OR POSTAL CODE").str.extract(r"([0-9]{5})", 1)
+            )
+            .cast({"ZIP OR POSTAL CODE": pl.UInt32})
+            .filter(
+                pl.col("PROPERTY TYPE").str.contains(
+                    "|".join(home_types.split(","))
+                )
+            )
+            .select(
+                "ADDRESS",
+                "CITY",
+                "STATE OR PROVINCE",
+                "YEAR BUILT",
+                "ZIP OR POSTAL CODE",
+                "PRICE",
+                "SQUARE FEET",
+                "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)",
+                "LATITUDE",
+                "LONGITUDE",
+            )
+        )
+        if df.height == 0:
+            log(
+                "CSV was empty. This can happen if local MLS rules dont allow downloads.",
+                "debug",
+            )
+            return None
+    except Exception as e:
+        log(f"Could not read gis csv into dataframe.\n{csv_text = }\n{e}", "warn")
+        return None
+    return df
 
@@ -2689,14 +2809,7 @@

Source code in src\backend\redfinscraper.py -
411
-412
-413
-414
-415
-416
-417
-418
+            
418
 419
 420
 421
@@ -2771,89 +2884,120 @@ 

490 491 492 -493

def get_heating_info_from_super_group(self, super_group: dict) -> list[str]:
-    """Extract heating information from a super group
-
-    :
-        Must supply a probable heating group for accurate information
-
-        Format of super group in JSON:
-        {
-            types: []
-            amenityGroups: [
-                {
-                    groupTitle: ""
-                    referenceName : ""
-                    amenityEntries : [
-                        {
-                            amenityName : ""
-                            referenceName: ""
-                            accessLevel : 1
-                            displayLevel : 1
-                            amenityValues : []
-                        },...
-                    ]
-                }
-            ]
-            titleString: ""
-        }
-
-        Format of groupTitle/propertyDetailsHeader on website:
-            Interior -> titleString
-            ...
-                Heating & Cooling -> groupTitle
-                    Electric -> no amenityName
-                    Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName
-                    Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In
-
-    Args:
-        super_group (dict): the super group to extract terms from
-
-    Returns:
-        list[str]: list of heating terms
-    """
-    amenity_values = []
-    for amenity in super_group.get("amenityGroups", ""):  #
-        if not any(
-            AMENITY_GROUP_INCLUDE_PATTERNS.findall(amenity.get("groupTitle", ""))
-        ):
-            continue  # this is the name that is bold
-        # these are the bulleted items.
-        for amenity_entry in amenity.get("amenityEntries", ""):
-            # if == "", then item is dangling (no word before colon). give the same treatment to "utilities: ..." as if it were ==""
-            amenity_name = amenity_entry.get("amenityName", "")
-
-            if amenity_name and not any(
-                re.compile("utilit", re.I).findall(amenity_name)
-            ):
-                # filter the before colon. first if is to have stricter capture rule when amenity item is "Utilities: Natural gas, heat pump, ..."
-                if any(
-                    AMENITY_NAME_INCLUDE_PATTERNS.findall(amenity_name)
-                ) and not any(AMENITY_NAME_EXCLUDE_PATTERNS.findall(amenity_name)):
-                    amenity_values.extend(
-                        [
-                            value
-                            for value in amenity_entry.get("amenityValues", "")
-                            if any(
-                                regex.findall(value)
-                                for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS
-                            )
-                            and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))
-                        ]
-                    )
-            else:
-                # filter for appliance if dangling or in utilities bullet item
-                amenity_values.extend(
-                    [
-                        value
-                        for value in amenity_entry.get("amenityValues", "")
-                        if any(
-                            regex.findall(value)
-                            for regex in APPLIANCE_HEATING_RELATED_PATTERNS
-                        )
-                    ]
-                )
-    return amenity_values
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
def get_heating_info_from_super_group(self, super_group: dict) -> list[str]:
+    """Extract heating information from a super group
+
+    :
+        Must supply a probable heating group for accurate information
+
+        Format of super group in JSON:
+        {
+            types: []
+            amenityGroups: [
+                {
+                    groupTitle: ""
+                    referenceName : ""
+                    amenityEntries : [
+                        {
+                            amenityName : ""
+                            referenceName: ""
+                            accessLevel : 1
+                            displayLevel : 1
+                            amenityValues : []
+                        },...
+                    ]
+                }
+            ]
+            titleString: ""
+        }
+
+        Format of groupTitle/propertyDetailsHeader on website:
+            Interior -> titleString
+            ...
+                Heating & Cooling -> groupTitle
+                    Electric -> no amenityName
+                    Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName
+                    Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In
+
+    Args:
+        super_group (dict): the super group to extract terms from
+
+    Returns:
+        list[str]: list of heating terms
+    """
+    amenity_values = []
+    utility_regex = re.compile("utilit", re.I)
+    heating_and_cooling_regex = re.compile("heat")
+    for amenity in super_group.get("amenityGroups", ""):
+        group_title = amenity.get("groupTitle", "")
+        if not any(AMENITY_GROUP_INCLUDE_PATTERNS.findall(group_title)):
+            continue  # this is the name that is bold
+        # these are the bulleted items.
+        for amenity_entry in amenity.get("amenityEntries", ""):
+            # if == "", then item is dangling (no word before colon). give the same treatment to "utilities: ..." as if it were ==""
+            amenity_name = amenity_entry.get("amenityName", "")
+
+            if amenity_name and not any(utility_regex.findall(amenity_name)):
+                # filter the before colon. first if is to have stricter capture rule when amenity item is "Utilities: Natural gas, heat pump, ..."
+                if any(
+                    AMENITY_NAME_INCLUDE_PATTERNS.findall(amenity_name)
+                ) and not any(AMENITY_NAME_EXCLUDE_PATTERNS.findall(amenity_name)):
+                    amenity_values.extend(
+                        [
+                            value
+                            for value in amenity_entry.get("amenityValues", "")
+                            if any(
+                                regex.findall(value)
+                                for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS
+                            )
+                            and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))
+                        ]
+                    )
+            elif any(heating_and_cooling_regex.findall(group_title)):
+                # if we are in "heating & cooling" and we are a dangling element
+                amenity_values.extend(
+                    [
+                        value
+                        for value in amenity_entry.get("amenityValues", "")
+                        if any(
+                            regex.findall(value)
+                            for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS
+                        )
+                        and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))
+                    ]
+                )
+            else:
+                # filter for appliance only if we are a dangling element or in the utilities bullet item
+                amenity_values.extend(
+                    [
+                        value
+                        for value in amenity_entry.get("amenityValues", "")
+                        if any(
+                            regex.findall(value)
+                            for regex in APPLIANCE_HEATING_RELATED_PATTERNS
+                        )
+                    ]
+                )
+    return amenity_values
 
@@ -2936,26 +3080,7 @@

Source code in src\backend\redfinscraper.py -
542
-543
-544
-545
-546
-547
-548
-549
-550
-551
-552
-553
-554
-555
-556
-557
-558
-559
-560
-561
+            
561
 562
 563
 564
@@ -2991,62 +3116,73 @@ 

594 595 596 -597

def get_heating_terms_dict_from_listing(
-    self, address_and_url_list: list[str]
-) -> dict[str, bool]:
-    """Generate a filled out dictionary based on `self.column_dict` and the contents of :meth:get_heating_info_from_super_group(address_url_list).
-
-    TODO:
-        Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen
-
-    Args:
-        address_and_url_list (list[str]): address in the first position, and the listing URL in the second position
-
-    Returns:
-        dict[str, bool]: the filled out `self.column_dict` for the supplied address/listing URL
-    """
-    address = address_and_url_list[0]
-    listing_url = address_and_url_list[1]
-    terms = []
-
-    super_groups = self.get_super_groups_from_url(listing_url)
-    if super_groups is None:
-        log(
-            "No amenities found", "info"
-        )  # this and "There was no heating information for {address}" should be made in caller?
-        return copy.deepcopy(self.column_dict)
-    for super_group in super_groups:  # dict
-        if any(
-            SUPER_GROUP_INCLUDE_PATTERNS.findall(super_group.get("titleString", ""))
-        ):
-            terms.extend(
-                self.get_heating_info_from_super_group(super_group)
-            )  # this will be like [gas, electricity, heat pump]
-    if len(terms) == 0:
-        log(
-            f"There was no heating information for {urlparse(listing_url).path}",
-            "info",
-        )
-        return copy.deepcopy(self.column_dict)
-
-    # categorize the correct dict and return
-    master_dict = copy.deepcopy(self.column_dict)
-    for input_string in terms:
-        log(f"{input_string = }", "debug")
-        result = {}
-        for key, pattern in CATEGORY_PATTERNS.items():
-            if bool(re.search(pattern, input_string)):
-                result[key] = True
-                log(f"Pattern matched on {key, pattern = }", "debug")
-            log(f"Pattern did not match on {key, pattern = }", "debug")
-        for key in result.keys():
-            master_dict[key] = result[key] | master_dict[key]
-
-    # You'll have to df.unnest this for use in a dataframe
-    log(f"{terms = }", "debug")
-    log(f"{master_dict = }", "debug")
-    log(f"Heating amenities found for {address}.", "info")
-    return master_dict
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
def get_heating_terms_dict_from_listing(
+    self, address_and_url_list: list[str]
+) -> dict[str, bool]:
+    """Generate a filled out dictionary based on `self.column_dict` and the contents of :meth:get_heating_info_from_super_group(address_url_list).
+
+    TODO:
+        Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen
+
+    Args:
+        address_and_url_list (list[str]): address in the first position, and the listing URL in the second position
+
+    Returns:
+        dict[str, bool]: the filled out `self.column_dict` for the supplied address/listing URL
+    """
+    address = address_and_url_list[0]
+    listing_url = address_and_url_list[1]
+    terms = []
+
+    super_groups = self.get_super_groups_from_url(listing_url)
+    if super_groups is None:
+        log("No amenities found", "info")
+        return copy.deepcopy(self.column_dict)
+    for super_group in super_groups:  # dict
+        if any(
+            SUPER_GROUP_INCLUDE_PATTERNS.findall(super_group.get("titleString", ""))
+        ):
+            terms.extend(self.get_heating_info_from_super_group(super_group))
+    if len(terms) == 0:
+        log(
+            f"There was no heating information for {urlparse(listing_url).path}",
+            "info",
+        )
+        return copy.deepcopy(self.column_dict)
+
+    # categorize the correct dict and return
+    master_dict = copy.deepcopy(self.column_dict)
+    for input_string in terms:
+        log(f"{input_string = }", "debug")
+        result = {}
+        for key, pattern in CATEGORY_PATTERNS.items():
+            if bool(re.search(pattern, input_string)):
+                result[key] = True
+                log(f"Pattern matched on {key, pattern = }", "debug")
+            log(f"Pattern did not match on {key, pattern = }", "debug")
+        for key in result.keys():
+            master_dict[key] = result[key] | master_dict[key]
+
+    # You'll have to df.unnest this for use in a dataframe
+    log(f"{terms = }", "debug")
+    log(f"{master_dict = }", "debug")
+    log(f"Heating amenities found for {address}.", "info")
+    return master_dict
 
@@ -3158,29 +3294,7 @@

Source code in src\backend\redfinscraper.py -
687
-688
-689
-690
-691
-692
-693
-694
-695
-696
-697
-698
-699
-700
-701
-702
-703
-704
-705
-706
-707
-708
-709
+            
709
 710
 711
 712
@@ -3259,108 +3373,174 @@ 

785 786 787 -788

def get_house_attributes_from_metro(
-    self,
-    msa_name: str,
-    search_filters: dict[str, Any],
-    use_cached_gis_csv_csv: bool = False,
-) -> None:
-    """Main function. Get the heating attributes of a Metropolitan Statistical Area.
-
-    TODO:
-        statistics on metropolitan
-        Log statistics about the heating outlook of a metro.
-
-    Args:
-        msa_name (str): Metropolitan Statistical Area name
-        search_filters (dict[str, Any]): search filters
-        use_cached_gis_csv_csv (bool, optional): Whether to use an already made GIS CSV DataFrame. Defaults to False.
-
-    Returns:
-        None: None if there were no houses found in the metro
-    """
-    msa_name_file_safe = msa_name.strip().replace(", ", "_").replace(" ", "_")
-    metro_output_dir_path = Path(OUTPUT_DIR_PATH) / msa_name_file_safe
-
-    if use_cached_gis_csv_csv:
-        log("Loading csv from cache.", "info")
-        try:
-            search_page_csvs_df = pl.read_csv(
-                metro_output_dir_path / (msa_name_file_safe + ".csv"),
-                dtypes=self.DESIRED_CSV_SCHEMA,
-            )
-            log(
-                f"Loading csv from {metro_output_dir_path / (msa_name_file_safe + ".csv")} is complete.",
-                "info",
-            )
-        except FileNotFoundError:
-            log(
-                f"Loading csv from {metro_output_dir_path / (msa_name_file_safe + ".csv")} has failed, continuing with API search.",
-                "info",
-            )
-            search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(
-                msa_name, search_filters
-            )
-    else:
-        search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(
-            msa_name, search_filters
-        )
-
-    if search_page_csvs_df is None:
-        log(f"No houses found within {msa_name}. Try relaxing filters.", "info")
-        return None
-
-    url_col_name = "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)"
-    search_page_csvs_df = search_page_csvs_df.filter(
-        (~pl.col(url_col_name).str.contains("(?i)unknown"))
-        .and_(pl.col("ADDRESS").str.len_chars().gt(0))
-        .and_(pl.col("SQUARE FEET").is_not_null())
-        .and_(pl.col("YEAR BUILT").is_not_null())
-    )
-    # .unique(subset=["LATITUDE", "LONGITUDE"], maintain_order=True)
-    # sometimes when there are two of the same listings you'll see the lot and the house. cant determine at this stage, so just leaving duplicates. hopefully this can be handled in viewer
-    # also somehow gets GIS-CSV for search pages that dont allow it
-
-    log(f"Found {search_page_csvs_df.height} possible houses in {msa_name}", "info")
-    os.makedirs(metro_output_dir_path, exist_ok=True)
-    log(
-        f"Writing csv for metro to {metro_output_dir_path / (msa_name_file_safe + ".csv")}",
-        "debug",
-    )
-    search_page_csvs_df.write_csv(
-        metro_output_dir_path / (msa_name_file_safe + ".csv")
-    )
-
-    # go through whole csv and get the house attributes for each house. then partition the dataframe by ZIP and save files
-
-    log("Starting lookups on listing URLS", "info")
-    log(
-        f"Unique ZIP codes: {search_page_csvs_df["ZIP OR POSTAL CODE"].n_unique()}",
-        "info",
-    )
-    log(
-        f"Estimated completion time: {search_page_csvs_df.height * 3.58} seconds",
-        "info",
-    )
-
-    list_of_dfs_by_zip = search_page_csvs_df.partition_by("ZIP OR POSTAL CODE")
-
-    for df_of_zip in list_of_dfs_by_zip:
-        df_of_zip = (
-            df_of_zip.with_columns(
-                pl.concat_list([pl.col("ADDRESS"), pl.col(url_col_name)])
-                .map_elements(self.get_heating_terms_dict_from_listing)
-                .alias("nest")
-            )
-            .drop(url_col_name)
-            .unnest("nest")
-        )
-
-        zip = df_of_zip.select("ZIP OR POSTAL CODE").item(0, 0)
-        df_of_zip.write_csv(f"{metro_output_dir_path}{os.sep}{zip}.csv")
-
-    # log(f"In {msa_name}, there are {} homes with Electric fuel, {} homes with Natural Gas, {} homes with Propane, {} homes with Diesel/Heating Oil, {} homes with Wood/Pellet, {} homes with Solar Heating, {} homes with Heat Pumps, {} homes with Baseboard, {} homes with Furnace, {} homes with Boiler, {} homes with Radiator, {} homes with Radiant Floor")
-    log(f"Done with searching houses in {msa_name}!", "info")
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
def get_house_attributes_from_metro(
+    self,
+    msa_name: str,
+    search_filters: dict[str, Any],
+    use_cached_gis_csv_csv: bool = False,
+) -> None:
+    """Main function. Get the heating attributes of a Metropolitan Statistical Area.
+
+    TODO:
+        statistics on metropolitan
+        Log statistics about the heating outlook of a metro.
+
+    Args:
+        msa_name (str): Metropolitan Statistical Area name
+        search_filters (dict[str, Any]): search filters
+        use_cached_gis_csv_csv (bool, optional): Whether to use an already made GIS CSV DataFrame. Defaults to False.
+
+    Returns:
+        None: None if there were no houses found in the metro
+    """
+    file_safe_msa_name = msa_name.strip().replace(", ", "_").replace(" ", "_")
+    METRO_OUTPUT_DIR_PATH = OUTPUT_DIR_PATH / file_safe_msa_name
+
+    if use_cached_gis_csv_csv:
+        log("Loading csv from cache.", "info")
+        try:
+            search_page_csvs_df = pl.read_csv(
+                METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv"),
+                dtypes=self.DESIRED_CSV_SCHEMA,
+            )
+            log(
+                f"Loading csv from {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv")} is complete.",
+                "info",
+            )
+        except FileNotFoundError:
+            log(
+                f"Loading csv from {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv")} has failed, continuing with API search.",
+                "info",
+            )
+            search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(
+                msa_name, search_filters
+            )
+    else:
+        search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(
+            msa_name, search_filters
+        )
+
+    if search_page_csvs_df is None:
+        log(f"No houses found within {msa_name}. Try relaxing filters.", "info")
+        return None
+
+    url_col_name = "URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)"
+    search_page_csvs_df = search_page_csvs_df.filter(
+        (~pl.col(url_col_name).str.contains("(?i)unknown"))
+        .and_(pl.col("ADDRESS").str.len_chars().gt(0))
+        .and_(pl.col("SQUARE FEET").is_not_null())
+        .and_(pl.col("YEAR BUILT").is_not_null())
+    )
+    # doing this twice so that the search page does not have nulls in the year built column.
+    min_year_built = search_filters.get("min year built")
+    max_year_built = search_filters.get("max year built")
+    assert min_year_built is not None and max_year_built is not None
+
+    # max() Acts like a Boolean OR
+    search_page_csvs_df = (
+        search_page_csvs_df.filter(
+            pl.col("YEAR BUILT")
+            .ge(int(min_year_built))
+            .and_(pl.col("YEAR BUILT").le(int(max_year_built)))
+        )
+        .group_by(by=["LATITUDE", "LONGITUDE"])
+        .max()
+    )
+
+    log(f"Found {search_page_csvs_df.height} possible houses in {msa_name}", "info")
+    METRO_OUTPUT_DIR_PATH.mkdir(parents=True, exist_ok=True)
+    log(
+        f"Writing csv for metro to {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv")}",
+        "debug",
+    )
+    search_page_csvs_df.write_csv(
+        METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + ".csv")
+    )
+
+    # go through whole csv and get the house attributes for each house. then partition the dataframe by ZIP and save files
+
+    log("Starting lookups on listing URLS", "info")
+    log(
+        f"Unique ZIP codes: {search_page_csvs_df["ZIP OR POSTAL CODE"].n_unique()}",
+        "info",
+    )
+    log(
+        f"Estimated completion time: {search_page_csvs_df.height * 4.5} seconds",
+        "info",
+    )
+
+    list_of_dfs_by_zip = search_page_csvs_df.partition_by("ZIP OR POSTAL CODE")
+
+    for i, _ in enumerate(list_of_dfs_by_zip):
+        list_of_dfs_by_zip[i] = (
+            list_of_dfs_by_zip[i]
+            .with_columns(
+                pl.concat_list([pl.col("ADDRESS"), pl.col(url_col_name)])
+                .map_elements(self.get_heating_terms_dict_from_listing)
+                .alias("nest")
+            )
+            .drop(url_col_name)
+            .unnest("nest")
+        )
+
+        zip = list_of_dfs_by_zip[i].select("ZIP OR POSTAL CODE").item(0, 0)
+        list_of_dfs_by_zip[i].write_csv(f"{METRO_OUTPUT_DIR_PATH / str(zip)}.csv")
+
+    if len(list_of_dfs_by_zip) > 0:
+        concat_df = pl.concat(list_of_dfs_by_zip)
+        log(f"Information on {msa_name}:", "info")
+        log(
+            f"num entries: {concat_df.height}, avg. house price: ${concat_df.get_column("PRICE").mean():,.2f}, electric houses: {concat_df.get_column("Electricity").sum()}, gas houses: {concat_df.get_column("Natural Gas").sum()}, propane houses: {concat_df.get_column("Propane").sum()}, oil-fed houses: {concat_df.get_column("Diesel/Heating Oil").sum()}, wood-fed houses: {concat_df.get_column("Wood/Pellet").sum()}, solar-heated houses: {concat_df.get_column("Solar Heating").sum()}, heat pump houses: {concat_df.get_column("Heat Pump").sum()}, baseboard houses: {concat_df.get_column("Baseboard").sum()}, furnace houses: {concat_df.get_column("Furnace").sum()}, boiler houses: {concat_df.get_column("Boiler").sum()}, radiator houses: {concat_df.get_column("Radiator").sum()}, houses with radiant floors: {concat_df.get_column("Radiant Floor").sum()}",
+            "info",
+        )
+
+        concat_df.write_csv(f"{METRO_OUTPUT_DIR_PATH}/full_info.csv")
+
+    log(f"Done with searching houses in {msa_name}!", "info")
 
@@ -3439,29 +3619,29 @@

Source code in src\backend\redfinscraper.py -
386
-387
-388
-389
-390
+            
def get_region_info_from_zipcode(self, zip_code: str) -> Any:
-    """Get the region ifo from a ZIP code.
-
-    Args:
-        zip_code (str): the ZIP code
-
-    Returns:
-        Any: response
-    """
-    return self.rf.meta_request(
-        "api/region", {"region_id": zip_code, "region_type": 2, "tz": True, "v": 8}
-    )
+397
+398
+399
+400
+401
def get_region_info_from_zipcode(self, zip_code: str) -> Any:
+    """Get the region ifo from a ZIP code.
+
+    Args:
+        zip_code (str): the ZIP code
+
+    Returns:
+        Any: response
+    """
+    return self.rf.meta_request(
+        "api/region", {"region_id": zip_code, "region_type": 2, "tz": True, "v": 8}
+    )
 
@@ -3540,26 +3720,7 @@

Source code in src\backend\redfinscraper.py -
495
-496
-497
-498
-499
-500
-501
-502
-503
-504
-505
-506
-507
-508
-509
-510
-511
-512
-513
-514
+            
514
 515
 516
 517
@@ -3585,52 +3746,71 @@ 

537 538 539 -540

def get_super_groups_from_url(self, listing_url: str) -> list | None:
-    """Get super group list from listing url.
-
-    Args:
-        listing_url (str): The path part of the listing URL. This is without the "redfin.com" part. Include the first forward slash
-
-    Returns:
-        list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found
-    """
-    if "redfin" in listing_url:
-        listing_url = urlparse(listing_url).path
-
-    try:
-        time.sleep(random.uniform(1.2, 2.1))
-        initial_info = self.rf.initial_info(listing_url)
-    except json.JSONDecodeError:
-        log(f"Could not get initial info for {listing_url =}", "warn")
-        return None
-    try:
-        property_id = initial_info["payload"]["propertyId"]
-    except KeyError:
-        log("Could not find property id", "critical")
-        return None
-    try:
-        listing_id = initial_info["payload"]["listingId"]
-    except KeyError:
-        listing_id = None
-        log(
-            "Could not find listing id. Will try to continue. if errors in final zip csv, this might be the issue",
-            "warn",
-        )
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
def get_super_groups_from_url(self, listing_url: str) -> list | None:
+    """Get super group list from listing url.
+
+    Args:
+        listing_url (str): The path part of the listing URL. This is without the "redfin.com" part. Include the first forward slash
+
+    Returns:
+        list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found
+    """
+    if "redfin" in listing_url:
+        listing_url = urlparse(listing_url).path
+
     try:
-        time.sleep(random.uniform(1.1, 2.1))
-        if listing_id is None:
-            mls_data = self.working_below_the_fold(property_id)
-        else:
-            mls_data = self.working_below_the_fold(property_id, listing_id)
-    except json.JSONDecodeError:
-        log(f"Could not find mls details for {listing_url = }", "warn")
-        return None
-    try:
-        super_groups = mls_data["payload"]["amenitiesInfo"]["superGroups"]
-    except KeyError:
-        log(f"Could not find property details for {listing_url = }", "warn")
-        return None
-    return super_groups
+        self._rate_limit()
+        initial_info = self.rf.initial_info(listing_url)
+    except json.JSONDecodeError:
+        log(f"Could not get initial info for {listing_url =}", "critical")
+        return None
+    try:
+        property_id = initial_info["payload"]["propertyId"]
+    except KeyError:
+        log("Could not find property id", "critical")
+        return None
+    try:
+        listing_id = initial_info["payload"]["listingId"]
+    except KeyError:
+        listing_id = None
+        log(
+            "Could not find listing id. Will try to continue. if errors in final zip csv, this might be the issue",
+            "debug",
+        )
+    try:
+        self._rate_limit()
+        if listing_id is None:
+            mls_data = self.working_below_the_fold(property_id)
+        else:
+            mls_data = self.working_below_the_fold(property_id, listing_id)
+    except json.JSONDecodeError:
+        log(f"Could not find mls details for {listing_url = }", "warn")
+        return None
+    try:
+        super_groups = mls_data["payload"]["amenitiesInfo"]["superGroups"]
+    except KeyError:
+        log(f"Could not find property details for {listing_url = }", "warn")
+        return None
+    return super_groups
 
@@ -3709,11 +3889,7 @@

Source code in src\backend\redfinscraper.py -
343
-344
-345
-346
-347
+            
347
 348
 349
 350
@@ -3723,21 +3899,25 @@ 

354 355 356 -357

def meta_request_download(self, url: str, search_params) -> str:
-    """Method for downloading objects from Redfin.
-
-    Args:
-        url (str): the Redfin URL
-
-    Returns:
-        str: the unicode text response
-    """
-    response = requests.get(
-        self.rf.base + url, params=search_params, headers=self.rf.user_agent_header
-    )
-    log(response.request.url, "debug")
-    response.raise_for_status()
-    return response.text
+357
+358
+359
+360
+361
def meta_request_download(self, url: str, search_params) -> str:
+    """Method for downloading objects from Redfin.
+
+    Args:
+        url (str): the Redfin URL
+
+    Returns:
+        str: the unicode text response
+    """
+    response = requests.get(
+        self.rf.base + url, params=search_params, headers=self.rf.user_agent_header
+    )
+    log(response.request.url, "debug")
+    response.raise_for_status()
+    return response.text
 
@@ -3806,11 +3986,7 @@

Source code in src\backend\redfinscraper.py -
233
-234
-235
-236
-237
+            
237
 238
 239
 240
@@ -3913,114 +4089,118 @@ 

337 338 339 -340

def set_search_params(self, zip: str, search_filters: dict[str, Any]) -> None:
-    """Set the parameters for searching by ZIP code.
-
-    Args:
-        zip (str): the ZIP code
-        search_filters (dict[str, Any]): search filters for appending to a gis-csv path
-    """
-    try:
-        region_info = self.get_region_info_from_zipcode(zip)
-    except json.JSONDecodeError:
-        log(f"Could not decode region info for {zip}.", "warn")
-        return None
-    except HTTPError:
-        log(f"Could not retrieve region info for {zip}.", "warn")
-        return None
-
-    if search_filters.get("for sale sold") == "Sold":
-        sort_order = self.SortOrder.MOST_RECENTLY_SOLD.value
-    else:
-        sort_order = self.SortOrder.NEWEST.value
-    # TODO make sure to fix filtering so that its not just "single family homes"
-
-    try:
-        market = region_info["payload"]["rootDefaults"]["market"]
-        region_id = region_info["payload"]["rootDefaults"]["region_id"]
-        status = str(region_info["payload"]["rootDefaults"]["status"])
-    except KeyError:
-        log("Market, region, or status could not be identified ", "warn")
-        return None
-
-    self.search_params = {
-        "al": 1,
-        "has_deal": "false",
-        "has_dishwasher": "false",
-        "has_laundry_facility": "false",
-        "has_laundry_hookups": "false",
-        "has_parking": "false",
-        "has_pool": "false",
-        "has_short_term_lease": "false",
-        "include_pending_homes": "false",  # probably an "include" option
-        "isRentals": "false",
-        "is_furnished": "false",
-        "is_income_restricted": "false",
-        "is_senior_living": "false",
-        "max_year_built": search_filters.get("max year built"),
-        "min_year_built": search_filters.get("min year built"),
-        "market": market,
-        "min_stories": search_filters.get("min stories"),
-        "num_homes": 350,
-        "ord": sort_order,
-        "page_number": "1",
-        "pool": "false",
-        "region_id": region_id,
-        "region_type": "2",
-        "status": status,
-        "travel_with_traffic": "false",
-        "travel_within_region": "false",
-        "utilities_included": "false",
-        "v": "8",
-    }
-    if search_filters.get("for sale sold") == "Sold":
-        self.search_params["sold_within_days"] = search_filters.get("sold within")
-        self.search_params["status"] = 9
-    else:
-        self.search_params["sf"] = "1, 2, 3, 4, 5, 6, 7"
-        match [
-            search_filters.get("status coming soon"),
-            search_filters.get("status active"),
-            search_filters.get("status pending"),
-        ]:
-            case [True, False, False]:
-                status = "8"
-            case [False, True, False]:
-                status = "1"
-            case [False, False, True]:
-                status = "130"
-            case [True, True, False]:
-                status = "9"
-            case [False, True, True]:
-                status = "139"
-            case [True, False, True]:
-                status = "138"
-            case [True, True, True]:
+340
+341
+342
+343
+344
def set_search_params(self, zip: str, search_filters: dict[str, Any]) -> None:
+    """Set the parameters for searching by ZIP code.
+
+    Args:
+        zip (str): the ZIP code
+        search_filters (dict[str, Any]): search filters for appending to a gis-csv path
+    """
+    try:
+        region_info = self.get_region_info_from_zipcode(zip)
+    except json.JSONDecodeError:
+        log(f"Could not decode region info for {zip}.", "warn")
+        return None
+    except HTTPError:
+        log(f"Could not retrieve region info for {zip}.", "warn")
+        return None
+
+    if search_filters.get("for sale sold") == "Sold":
+        sort_order = self.SortOrder.MOST_RECENTLY_SOLD.value
+    else:
+        sort_order = self.SortOrder.NEWEST.value
+    # TODO make sure to fix filtering so that its not just "single family homes"
+
+    try:
+        market = region_info["payload"]["rootDefaults"]["market"]
+        region_id = region_info["payload"]["rootDefaults"]["region_id"]
+        status = str(region_info["payload"]["rootDefaults"]["status"])
+    except KeyError:
+        log("Market, region, or status could not be identified ", "warn")
+        return None
+
+    self.search_params = {
+        "al": 1,
+        "has_deal": "false",
+        "has_dishwasher": "false",
+        "has_laundry_facility": "false",
+        "has_laundry_hookups": "false",
+        "has_parking": "false",
+        "has_pool": "false",
+        "has_short_term_lease": "false",
+        "include_pending_homes": "false",  # probably an "include" option
+        "isRentals": "false",
+        "is_furnished": "false",
+        "is_income_restricted": "false",
+        "is_senior_living": "false",
+        "max_year_built": search_filters.get("max year built"),
+        "min_year_built": search_filters.get("min year built"),
+        "market": market,
+        "min_stories": search_filters.get("min stories"),
+        "num_homes": 350,
+        "ord": sort_order,
+        "page_number": "1",
+        "pool": "false",
+        "region_id": region_id,
+        "region_type": "2",
+        "status": status,
+        "travel_with_traffic": "false",
+        "travel_within_region": "false",
+        "utilities_included": "false",
+        "v": "8",
+    }
+    if search_filters.get("for sale sold") == "Sold":
+        self.search_params["sold_within_days"] = search_filters.get("sold within")
+        self.search_params["status"] = 9
+    else:
+        self.search_params["sf"] = "1, 2, 3, 4, 5, 6, 7"
+        match [
+            search_filters.get("status coming soon"),
+            search_filters.get("status active"),
+            search_filters.get("status pending"),
+        ]:
+            case [True, False, False]:
+                status = "8"
+            case [False, True, False]:
+                status = "1"
+            case [False, False, True]:
+                status = "130"
+            case [True, True, False]:
+                status = "9"
+            case [False, True, True]:
                 status = "139"
-
-        self.search_params["status"] = status
-
-    if (max_sqft := search_filters.get("max sqft")) != "None":
-        self.search_params["max_sqft"] = max_sqft
-    if (min_sqft := search_filters.get("min sqft")) != "None":
-        self.search_params["min_sqft"] = min_sqft
-
-    if (max_price := search_filters.get("max price")) != "None":
-        self.search_params["max_price"] = max_price
-    if (min_price := search_filters.get("min price")) != "None":
-        self.search_params["min_price"] = min_price
-
-    houses = ""  # figure out how to join into comma string
-    if search_filters.get("house type house") is True:
-        houses = houses + "1"
-    if search_filters.get("house type condo") is True:
-        houses = houses + "2"
-    if search_filters.get("house type townhouse") is True:
-        houses = houses + "3"
-    if search_filters.get("house type mul fam") is True:
-        houses = houses + "4"
-
-    self.search_params["uipt"] = ",".join(list(houses))
+            case [True, False, True]:
+                status = "138"
+            case [True, True, True]:
+                status = "139"
+
+        self.search_params["status"] = status
+
+    if (max_sqft := search_filters.get("max sqft")) != "None":
+        self.search_params["max_sqft"] = max_sqft
+    if (min_sqft := search_filters.get("min sqft")) != "None":
+        self.search_params["min_sqft"] = min_sqft
+
+    if (max_price := search_filters.get("max price")) != "None":
+        self.search_params["max_price"] = max_price
+    if (min_price := search_filters.get("min price")) != "None":
+        self.search_params["min_price"] = min_price
+
+    houses = ""  # figure out how to join into comma string
+    if search_filters.get("house type house") is True:
+        houses = houses + "1"
+    if search_filters.get("house type condo") is True:
+        houses = houses + "2"
+    if search_filters.get("house type townhouse") is True:
+        houses = houses + "3"
+    if search_filters.get("house type mul fam") is True:
+        houses = houses + "4"
+
+    self.search_params["uipt"] = ",".join(list(houses))
 
@@ -4115,11 +4295,7 @@

Source code in src\backend\redfinscraper.py -
359
-360
-361
-362
-363
+            
363
 364
 365
 366
@@ -4140,32 +4316,36 @@ 

381 382 383 -384

def working_below_the_fold(self, property_id: str, listing_id: str = "") -> Any:
-    """A below_the_fold method that accepts a listing ID.
-    Note:
-        If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it
-
-    Args:
-        property_id (str): the property ID
-        listing_id (str): The listing ID. Defaults to False.
+384
+385
+386
+387
+388
def working_below_the_fold(self, property_id: str, listing_id: str = "") -> Any:
+    """A below_the_fold method that accepts a listing ID.
+    Note:
+        If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it
 
-    Returns:
-        Any: response
-    """
-    if listing_id:
-        params = {
-            "accessLevel": 1,
-            "propertyId": property_id,
-            "listingId": listing_id,
-            "pageType": 1,
-        }
-    else:
-        params = {
-            "accessLevel": 1,
-            "propertyId": property_id,
-            "pageType": 1,
-        }
-    return self.rf.meta_request("/api/home/details/belowTheFold", params)
+    Args:
+        property_id (str): the property ID
+        listing_id (str): The listing ID. Defaults to False.
+
+    Returns:
+        Any: response
+    """
+    if listing_id:
+        params = {
+            "accessLevel": 1,
+            "propertyId": property_id,
+            "listingId": listing_id,
+            "pageType": 1,
+        }
+    else:
+        params = {
+            "accessLevel": 1,
+            "propertyId": property_id,
+            "pageType": 1,
+        }
+    return self.rf.meta_request("/api/home/details/belowTheFold", params)
 
diff --git a/backend/secondarydata/index.html b/backend/secondarydata/index.html index 25b5d79..2785523 100755 --- a/backend/secondarydata/index.html +++ b/backend/secondarydata/index.html @@ -414,29 +414,15 @@ @@ -848,691 +820,703 @@

Source code in src\backend\secondarydata.py -
654
-655
-656
-657
-658
-659
-660
-661
-662
-663
-664
-665
-666
-667
-668
-669
-670
-671
-672
-673
-674
-675
-676
-677
-678
-679
-680
-681
-682
-683
-684
-685
-686
-687
-688
-689
-690
-691
-692
-693
-694
-695
-696
-697
-698
-699
-700
-701
-702
-703
-704
-705
-706
-707
-708
-709
-710
-711
-712
-713
-714
-715
-716
-717
-718
-719
-720
-721
-722
-723
-724
-725
-726
-727
-728
-729
-730
-731
-732
-733
-734
-735
-736
-737
-738
-739
-740
-741
-742
-743
-744
-745
-746
-747
-748
-749
-750
-751
-752
-753
-754
-755
-756
-757
-758
-759
-760
-761
-762
-763
-764
-765
-766
-767
-768
-769
-770
-771
-772
-773
-774
-775
-776
-777
-778
-779
-780
-781
-782
-783
-784
-785
-786
-787
-788
-789
-790
-791
-792
-793
-794
-795
-796
-797
-798
-799
-800
-801
-802
-803
-804
-805
-806
-807
-808
-809
-810
-811
-812
-813
-814
-815
-816
-817
-818
-819
-820
-821
-822
-823
-824
-825
-826
-827
-828
-829
-830
-831
-832
-833
-834
-835
-836
-837
-838
-839
-840
-841
-842
-843
-844
-845
-846
-847
-848
-849
-850
-851
-852
-853
-854
-855
-856
-857
-858
-859
-860
-861
-862
-863
-864
-865
-866
-867
-868
-869
-870
-871
-872
-873
-874
-875
-876
-877
-878
-879
-880
-881
-882
-883
-884
-885
-886
-887
-888
-889
-890
-891
-892
-893
-894
-895
-896
-897
-898
-899
-900
-901
-902
-903
-904
-905
-906
-907
-908
-909
-910
-911
-912
-913
-914
-915
-916
-917
-918
-919
-920
-921
-922
-923
-924
-925
-926
-927
-928
-929
-930
-931
-932
-933
-934
-935
-936
-937
-938
-939
-940
-941
-942
-943
-944
-945
-946
-947
-948
-949
-950
-951
-952
-953
-954
-955
-956
-957
-958
-959
-960
-961
-962
-963
-964
-965
-966
-967
-968
-969
-970
-971
-972
-973
-974
-975
-976
-977
-978
-979
-980
-981
-982
-983
-984
-985
-986
-987
-988
-989
-990
-991
-992
-993
-994
-995
-996
class CensusDataRetriever:
-    """Interact with the Census data API.
-
-    Note:
-        ACS5 paths can be found here: https://api.census.gov/data/2019/acs/acs5.html"""
-
-    def __init__(self) -> None:
-        self.base_url = "https://data.census.gov/"
-        # https://api.census.gov/data/2021/acs/acs5/profile/variables.html
-        self.api_key = os.getenv("CENSUS_API_KEY")
-        if self.api_key is None:
-            log(
-                "No Census API key found in a .env file in project directory. please request a key at https://api.census.gov/data/key_signup.html",
-                "critical",
-            )
-            exit()
-        self.MAX_COL_NAME_LENGTH = 80
-
-    def _get(self, url: str) -> requests.Response | None:
-        r = requests.get(url, timeout=65)
-        if r.status_code == 400:
-            log(f"Unknown variable {r.text.split("variable ")[-1]}", "info")
-            return None
-        return r
-
-    def get_and_cache_data(
-        self, file_name: str, url_to_lookup_on_miss: str
-    ) -> dict[str, str] | bool:
-        """Cache files.
-
-        Args:
-            file_name (str): file name to save/lookup
-            url_to_lookup_on_miss (str): the Census url to lookup
-
-        Returns:
-            bool | dict[str, str] | None | Any: the dict of `tablename: label` or
-        """
-        CENSUS_DATA_DIR_PATH.mkdir(parents=True, exist_ok=True)
-
-        my_json = None
+              
 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
class CensusDataRetriever:
+    """Interact with the Census data API.
+
+    Note:
+        ACS5 paths can be found here: https://api.census.gov/data/2019/acs/acs5.html"""
+
+    def __init__(self) -> None:
+        self.base_url = "https://data.census.gov/"
+        # https://api.census.gov/data/2021/acs/acs5/profile/variables.html
+        self.api_key = os.getenv("CENSUS_API_KEY")
+        if self.api_key is None:
+            log(
+                "No Census API key found in a .env file in project directory. please request a key at https://api.census.gov/data/key_signup.html",
+                "critical",
+            )
+            exit()
+        self.MAX_COL_NAME_LENGTH = 80
+
+    def _get(self, url: str) -> requests.Response | None:
+        r = requests.get(url, timeout=65)
+        if r.status_code == 400:
+            log(f"Unknown variable {r.text.split("variable ")[-1]}", "info")
+            return None
+        return r
+
+    def get_and_cache_data(
+        self, file_name: str, url_to_lookup_on_miss: str
+    ) -> dict[str, str] | bool:
+        """Cache files.
+
+        Args:
+            file_name (str): file name to save/lookup
+            url_to_lookup_on_miss (str): the Census url to lookup
+
+        Returns:
+            bool | dict[str, str] | None | Any: the dict of `tablename: label` or
+        """
+        CENSUS_DATA_CACHE_PATH.mkdir(parents=True, exist_ok=True)
 
-        try:
-            with open(CENSUS_DATA_DIR_PATH / file_name, mode="r") as f:
-                log(f"Reading {file_name}", "debug")
-                try:
-                    my_json = json.load(f)
-                except json.JSONDecodeError:
-                    log("Could not decode cached file", "error")
-                    return False
-        except FileNotFoundError:
-            req = self._get(url_to_lookup_on_miss)
-            if req is None:
-                log(f"Could not find census file {req = }", "error")
-                return False
-            req.raise_for_status()
-            my_json = req.json()
-            with open(CENSUS_DATA_DIR_PATH / file_name, "w") as f:
-                json.dump(my_json, f)
-
-        return my_json
-
-    def get_race_makeup_by_zcta(self, zcta: str) -> str | None:
-        """Get race make up by zcta from. DO NOT USE
+        my_json = None
+
+        try:
+            with open(CENSUS_DATA_CACHE_PATH / file_name, mode="r") as f:
+                log(f"Reading {file_name}", "debug")
+                try:
+                    my_json = json.load(f)
+                except json.JSONDecodeError:
+                    log("Could not decode cached census file", "error")
+                    return False
+        except FileNotFoundError:
+            req = self._get(url_to_lookup_on_miss)
+            log(f"Getting {url_to_lookup_on_miss}...", "info")
+            if req is None:
+                log(f"Could not get census file {file_name}.", "error")
+                return False
+            req.raise_for_status()
+            my_json = req.json()
+            with open(CENSUS_DATA_CACHE_PATH / file_name, "w") as f:
+                json.dump(my_json, f)
+
+        return my_json
 
-        Note:
-            use `get_table_group_for_zcta_by_state_by_year`
+    def get_race_makeup_by_zcta(self, zcta: str) -> str | None:
+        """Get race make up by zcta from. DO NOT USE
 
-        Args:
-            zcta (str): zcta
+        Note:
+            use `get_table_group_for_zcta_by_state_by_year`
 
-        Returns:
-            str | None: text or none
-        """
-        # get white, black, american indian/native alaskan, asian, NH/PI, other. note that these are estimates, margin of error can be had with "M"
-        req = self._get(
-            f"https://api.census.gov/data/2021/acs/acs5/profile?get=DP05_0064E,DP05_0065E,DP05_0066E,DP05_0067E,DP05_0068E,DP05_0069E&for=zip%20code%20tabulation%20area:{zcta}&key={self.api_key}"
-        )
-        if req is None:
-            return None
-        return req.text
-
-    def get_acs5_profile_table_to_group_name(
-        self, table: str, year: str
-    ) -> dict[str, Any] | None:
-        """Get a JSON representation of a table's attributes.
-
-        Note:
-            Tables must be:
-                * DP02
-                * DP02PR
-                * DP03
-                * DP04
-                * DP05
-
-            Returned object will have entries similar to:
-            ```json
-            "DP05_0037M": {
-                "label": "Margin of Error!!RACE!!Total population!!One race!!White",
-                "concept": "ACS DEMOGRAPHIC AND HOUSING ESTIMATES",
-                "predicateType": "int",
-                "group": "DP05",
-                "limit": 0,
-                "predicateOnly": true
-            }
-            ```
-
-        Args:
-            table (str): the table to lookup
-            year (str): which acs5 year to look up
-
-        Returns:
-            str | Any: json object
-        """
-        file_name = f"{year}-acs5-profile-groups-{table}.json"
-        groups_url = (
-            f"https://api.census.gov/data/{year}/acs/acs5/profile/groups/{table}.json"
-        )
-        groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)
-        if groups_to_label_translation is False:
-            log("Something is wrong with groups label dict", "warn")
-            return None
-        return groups_to_label_translation["variables"]  # type: ignore
-
-    def translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(
-        self, headers: list[str], table: str, year: str
-    ) -> None:
-        """Get the label name for a table and row for the acs5 profile surveys.
-
-        Args:
-            headers (list[str]): header row
-            table (str): have to look again
-            year (str): the year
-
-        Returns:
-            None: translates the list of table_row_selector to its english label
-        """
-        # is going to read the file multiple times, save last req as {"table": req_json[table]...} for this?
-        groups_to_label_translation_dict = self.get_acs5_profile_table_to_group_name(
-            table, year
-        )
-        if groups_to_label_translation_dict is None:
-            log("Could not translate headers", "warn")
-            return groups_to_label_translation_dict
-
-        for idx, header in enumerate(headers):
-            new_col_name_dict = groups_to_label_translation_dict.get(header)
-            if new_col_name_dict is None:
-                # returns none if not in dict, means we have custom name and can continue
-                continue
-            new_col_name = new_col_name_dict["label"]
-            # qgis doesnt allow field names of 80+ chars. massage into form, then cut off
-            # delimiter for table subsection
-            new_col_name = re.sub("!!", " ", new_col_name)
-            new_col_name = re.sub(r"\s+", " ", new_col_name)
-            # easier to read
-            new_col_name_parts = new_col_name.split(" ")
-            for idy, no_format in enumerate(new_col_name_parts):
-                new_col_name_parts[idy] = no_format.capitalize()
-            new_col_name = "".join(new_col_name_parts)
-            # shortenings to fit length requirement
-            for key, value in replace_dict.items():
-                new_col_name = re.sub(key, value, new_col_name)
-            # limiter
-            new_col_name = new_col_name[
-                : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)
-            ]
-
-            if new_col_name not in headers[:idx]:
-                headers[idx] = new_col_name
-
-    def get_acs5_profile_table_group_for_zcta_by_year(
-        self, table: str, year: str
-    ) -> str:
-        """CSV output of an acs 5 year profile survey table.
-
-        TODO:
-            Update func name
-
-        Args:
-            table (str): census demo acs5 table
-            year (str): year to search
-
-        Returns:
-            str: file path where output is saved
-        """
-        file_name = f"{year}-acs-profile-table-{table}.json"
-        url = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({table})&for=zip%20code%20tabulation%20area:*"
-        list_of_list_table_json = self.get_and_cache_data(file_name, url)
-
-        if list_of_list_table_json is False:
-            log(
-                f"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.",
-                "warn",
-            )
-            return ""
-
-        self.translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(
-            list_of_list_table_json[0],  # type: ignore
-            table,
-            year,  # type: ignore
-        )
+        Args:
+            zcta (str): zcta
+
+        Returns:
+            str | None: text or none
+        """
+        # get white, black, american indian/native alaskan, asian, NH/PI, other. note that these are estimates, margin of error can be had with "M"
+        req = self._get(
+            f"https://api.census.gov/data/2021/acs/acs5/profile?get=DP05_0064E,DP05_0065E,DP05_0066E,DP05_0067E,DP05_0068E,DP05_0069E&for=zip%20code%20tabulation%20area:{zcta}&key={self.api_key}"
+        )
+        if req is None:
+            return None
+        return req.text
+
+    def _get_acs5_profile_table_to_group_name(
+        self, table: str, year: str
+    ) -> dict[str, Any] | None:
+        """Get a JSON representation of a table's attributes.
+
+        Note:
+            Tables must be:
+                * DP02
+                * DP02PR
+                * DP03
+                * DP04
+                * DP05
+
+            Returned object will have entries similar to:
+            ```json
+            "DP05_0037M": {
+                "label": "Margin of Error!!RACE!!Total population!!One race!!White",
+                "concept": "ACS DEMOGRAPHIC AND HOUSING ESTIMATES",
+                "predicateType": "int",
+                "group": "DP05",
+                "limit": 0,
+                "predicateOnly": true
+            }
+            ```
+
+        Args:
+            table (str): the table to lookup
+            year (str): which acs5 year to look up
+
+        Returns:
+            str | Any: json object
+        """
+        file_name = f"{year}-acs5-profile-groups-{table}.json"
+        groups_url = (
+            f"https://api.census.gov/data/{year}/acs/acs5/profile/groups/{table}.json"
+        )
+        groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)
+        if groups_to_label_translation is False:
+            log("Something is wrong with groups label dict", "warn")
+            return None
+        return groups_to_label_translation["variables"]  # type: ignore
+
+    def _translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(
+        self, headers: list[str], table: str, year: str
+    ) -> None:
+        """Get the label name for a table and row for the acs5 profile surveys.
+
+        Args:
+            headers (list[str]): header row
+            table (str): have to look again
+            year (str): the year
+
+        Returns:
+            None: translates the list of table_row_selector to its english label
+        """
+        # is going to read the file multiple times, save last req as {"table": req_json[table]...} for this?
+        groups_to_label_translation_dict = self._get_acs5_profile_table_to_group_name(
+            table, year
+        )
+        if groups_to_label_translation_dict is None:
+            log("Could not translate headers", "warn")
+            return groups_to_label_translation_dict
+
+        for idx, header in enumerate(headers):
+            new_col_name_dict = groups_to_label_translation_dict.get(header)
+            if new_col_name_dict is None:
+                # returns none if not in dict, means we have custom name and can continue
+                continue
+            new_col_name = new_col_name_dict["label"]
+            # qgis doesnt allow field names of 80+ chars. massage into form, then cut off
+            # delimiter for table subsection
+            new_col_name = new_col_name.replace("$", "D")
+            new_col_name = new_col_name.replace(",", "")
+            new_col_name = new_col_name.replace("'", "")
+            new_col_name = re.sub(r"\s+", " ", new_col_name)
+            new_col_name = new_col_name.replace("!!", " ")
+            # easier to read
+            new_col_name_parts = new_col_name.split(" ")
+            for idy, no_format in enumerate(new_col_name_parts):
+                new_col_name_parts[idy] = no_format.capitalize()
+            new_col_name = "".join(new_col_name_parts)
+            # shortenings to fit length requirement
+            for key, value in REPLACEMENT_DICT.items():
+                new_col_name = re.sub(key, value, new_col_name)
+            # limiter
+            new_col_name = new_col_name[
+                : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)
+            ]
+
+            if new_col_name not in headers[:idx]:
+                headers[idx] = new_col_name
+
+    def generate_acs5_profile_table_group_for_zcta_by_year(
+        self, table: str, year: str
+    ) -> str:
+        """CSV output of an acs 5 year profile survey table.
+
+        TODO:
+            Update func name
+
+        Args:
+            table (str): census demo acs5 table
+            year (str): year to search
+
+        Returns:
+            str: file path where output is saved
+        """
+        file_name = f"{year}-acs-profile-table-{table}.json"
+        url = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({table})&for=zip%20code%20tabulation%20area:*"
+        list_of_list_table_json = self.get_and_cache_data(file_name, url)
+
+        if list_of_list_table_json is False:
+            log(
+                f"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.",
+                "warn",
+            )
+            return ""
 
-        df = pl.DataFrame(list_of_list_table_json, orient="row")
-        # funky stuff to get the first list to be the name of the columns
-        df = (
-            df.rename(df.head(1).to_dicts().pop())
-            .slice(1)  # type: ignore
-            .drop("NAME", cs.matches("(?i)^(ann)"), cs.matches(f"(?i){table}"))
-            .rename({"zip code tabulation area": "ZCTA"})
-            .cast(
-                {
-                    "ZCTA": pl.Int32,
-                }
-            )
-        )
-        file_path = CENSUS_DATA_DIR_PATH / "acs5-profile-group-{table}-zcta.csv"
-        df.write_csv(file_path)
-        return str(file_path)
-
-    def get_acs5_subject_table_to_group_name(
-        self, table: str, year: str
-    ) -> dict[str, Any] | None:
-        """Get a JSON representation of a table's attributes.
-
-        Note:
-            Tables can be found at: https://www.census.gov/acs/www/data/data-tables-and-tools/subject-tables/
-
-            Returned object will have entries similar to:
-            ```json
-            "DP05_0037M": {
-                "label": "Margin of Error!!RACE!!Total population!!One race!!White",
-                "concept": "ACS DEMOGRAPHIC AND HOUSING ESTIMATES",
-                "predicateType": "int",
-                "group": "DP05",
-                "limit": 0,
-                "predicateOnly": true
-            }
-            ```
-
-        Args:
-            table (str): the table to lookup
-            year (str): which acs5 year to look up
-
-        Returns:
-            str | Any: variables
-        """
-        file_name = f"{year}-acs5-subject-groups-{table}.json"
-        groups_url = (
-            f"https://api.census.gov/data/{year}/acs/acs5/subject/groups/{table}.json"
-        )
-        groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)
-        if groups_to_label_translation is False:
-            log("Something is wrong with groups label dict", "warn")
-            return None
-        return groups_to_label_translation["variables"]  # type: ignore
-
-    def translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(
-        self, headers: list[str], table: str, year: str
-    ) -> None:
-        """Gets the label name for a table and row for the acs5 profile surveys.
-
-        Args:
-            headers (list[str]): headers
-            table (str): table
-            year (str): year
-        """
-        # is going to read the file multiple times, save last req as {"table": req_json[table]...} for this?
-        groups_to_label_translation_dict = self.get_acs5_subject_table_to_group_name(
-            table, year
-        )
-        if groups_to_label_translation_dict is None:
-            log("Could not translate headers", "warn")
-            return groups_to_label_translation_dict
-
-        for idx, header in enumerate(headers):
-            new_col_name_dict = groups_to_label_translation_dict.get(header)
-            if new_col_name_dict is None:
-                # returns none if not in dict, means we have custom name and can continue
-                continue
-            new_col_name = new_col_name_dict["label"]
-            # qgis doesnt allow field names of 80+ chars. massage into form, then cut off
-            # delimiter for table subsection
-            new_col_name = re.sub("!!", " ", new_col_name)
-            new_col_name = re.sub(r"\s+", " ", new_col_name)
-            # easier to read
-            new_col_name_parts = new_col_name.split(" ")
-            for idy, no_format in enumerate(new_col_name_parts):
-                new_col_name_parts[idy] = no_format.capitalize()
-            new_col_name = "".join(new_col_name_parts)
-            # shortenings to fit length requirement
-            for key, value in replace_dict.items():
-                new_col_name = re.sub(key, value, new_col_name)
-            # limiter
-            new_col_name = new_col_name[
-                : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)
-            ]
-
-            if new_col_name not in headers[:idx]:
-                headers[idx] = new_col_name
-
-    def get_acs5_subject_table_group_for_zcta_by_year(
-        self, table: str, year: str
-    ) -> str:
-        """CSV output of a acs 5 year subject survey table
-
-        Args:
-            table (str): census acs5 table
-            year (str): year to search
-        """
-        file_name = f"{year}-acs-subject-table-{table}.json"
-        url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=group({table})&for=zip%20code%20tabulation%20area:*"
-        list_of_list_table_json = self.get_and_cache_data(file_name, url)
-
-        if list_of_list_table_json is False:
-            log(
-                f"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.",
-                "warn",
-            )
-            return ""
-
-        self.translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(
-            list_of_list_table_json[0],  # type: ignore
-            table,
-            year,  # type: ignore
-        )
-
-        df = pl.DataFrame(list_of_list_table_json, orient="row")
-        # funky stuff to get the first list to be the name of the columns
-        df = (
-            df.rename(df.head(1).to_dicts().pop())
-            .slice(1)  # type: ignore
-            .drop("NAME", cs.matches("(?i)^(ann)"), cs.matches(f"(?i){table}"))
-            .rename({"zip code tabulation area": "ZCTA"})
-            .cast(
-                {
-                    "ZCTA": pl.Int32,
-                }
-            )
-        )
-        file_path = CENSUS_DATA_DIR_PATH / "acs5-subject-group-{table}-zcta.csv"
-        # may not have to write. but cache func doesn't return whether it hits or not
-        df.write_csv(file_path)
-        return str(file_path)
+        self._translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(
+            list_of_list_table_json[0],  # type: ignore
+            table,
+            year,  # type: ignore
+        )
+
+        df = pl.DataFrame(list_of_list_table_json, orient="row")
+        # funky stuff to get the first list to be the name of the columns
+        df = (
+            df.rename(df.head(1).to_dicts().pop())
+            .slice(1)  # type: ignore
+            .drop("NAME", cs.matches("(?i)^(ann)"), cs.matches(f"(?i){table}"))
+            .rename({"zip code tabulation area": "ZCTA"})
+            .cast(
+                {
+                    "ZCTA": pl.Int32,
+                }
+            )
+        )
+        table_file_name = CENSUS_DATA_DIR_PATH / f"acs5-profile-group-{table}-zcta.csv"
+        df.write_csv(table_file_name)
+        return str(table_file_name)
+
+    def _get_acs5_subject_table_to_group_name(
+        self, table: str, year: str
+    ) -> dict[str, Any] | None:
+        """Get a JSON representation of a table's attributes.
+
+        Note:
+            Tables can be found at: https://www.census.gov/acs/www/data/data-tables-and-tools/subject-tables/
+
+            Returned object will have entries similar to:
+            ```json
+            "DP05_0037M": {
+                "label": "Margin of Error!!RACE!!Total population!!One race!!White",
+                "concept": "ACS DEMOGRAPHIC AND HOUSING ESTIMATES",
+                "predicateType": "int",
+                "group": "DP05",
+                "limit": 0,
+                "predicateOnly": true
+            }
+            ```
+
+        Args:
+            table (str): the table to lookup
+            year (str): which acs5 year to look up
+
+        Returns:
+            str | Any: variables
+        """
+        file_name = f"{year}-acs5-subject-groups-{table}.json"
+        groups_url = (
+            f"https://api.census.gov/data/{year}/acs/acs5/subject/groups/{table}.json"
+        )
+        groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)
+        if groups_to_label_translation is False:
+            log("Something is wrong with groups label dict", "warn")
+            return None
+        return groups_to_label_translation["variables"]  # type: ignore
+
+    def _translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(
+        self, headers: list[str], table: str, year: str
+    ) -> None:
+        """Gets the label name for a table and row for the acs5 profile surveys.
+
+        Args:
+            headers (list[str]): headers
+            table (str): table
+            year (str): year
+        """
+        # is going to read the file multiple times, save last req as {"table": req_json[table]...} for this?
+        groups_to_label_translation_dict = self._get_acs5_subject_table_to_group_name(
+            table, year
+        )
+        if groups_to_label_translation_dict is None:
+            log("Could not translate headers", "warn")
+            return groups_to_label_translation_dict
+
+        for idx, header in enumerate(headers):
+            new_col_name_dict = groups_to_label_translation_dict.get(header)
+            if new_col_name_dict is None:
+                # returns none if not in dict, means we have custom name and can continue
+                continue
+            new_col_name = new_col_name_dict["label"]
+            # qgis doesnt allow field names of 80+ chars. massage into form, then cut off
+            # delimiter for table subsection
+            new_col_name = new_col_name.replace("$", "D")
+            new_col_name = new_col_name.replace(",", "")
+            new_col_name = new_col_name.replace("'", "")
+            new_col_name = re.sub(r"\s+", " ", new_col_name)
+            new_col_name = new_col_name.replace("!!", " ")
+            # easier to read
+            new_col_name_parts = new_col_name.split(" ")
+            for idy, no_format in enumerate(new_col_name_parts):
+                new_col_name_parts[idy] = no_format.capitalize()
+            new_col_name = "".join(new_col_name_parts)
+            # shortenings to fit length requirement
+            for key, value in REPLACEMENT_DICT.items():
+                new_col_name = re.sub(key, value, new_col_name)
+            # limiter
+            new_col_name = new_col_name[
+                : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)
+            ]
+
+            if new_col_name not in headers[:idx]:
+                headers[idx] = new_col_name
+
+    def generate_acs5_subject_table_group_for_zcta_by_year(
+        self, table: str, year: str
+    ) -> str:
+        """CSV output of a acs 5 year subject survey table
+
+        Args:
+            table (str): census acs5 table
+            year (str): year to search
+        """
+        file_name = f"{year}-acs-subject-table-{table}.json"
+        url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=group({table})&for=zip%20code%20tabulation%20area:*"
+        list_of_list_table_json = self.get_and_cache_data(file_name, url)
+        if list_of_list_table_json is False:
+            log(
+                f"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.",
+                "warn",
+            )
+            return ""
+
+        self._translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(
+            list_of_list_table_json[0],  # type: ignore
+            table,
+            year,  # type: ignore
+        )
+
+        df = pl.DataFrame(list_of_list_table_json, orient="row")
+        # funky stuff to get the first list to be the name of the columns
+        df = (
+            df.rename(df.head(1).to_dicts().pop())
+            .slice(1)  # type: ignore
+            .drop("NAME", cs.matches("(?i)^(ann)"), cs.matches(f"(?i){table}"))
+            .rename({"zip code tabulation area": "ZCTA"})
+            .cast(
+                {
+                    "ZCTA": pl.Int32,
+                }
+            )
+        )
+        table_file_name = CENSUS_DATA_DIR_PATH / f"acs5-subject-group-{table}-zcta.csv"
+        # may not have to write. but cache func doesn't return whether it hits or not
+        df.write_csv(table_file_name)
+        return str(table_file_name)
 
@@ -1554,8 +1538,8 @@

-

- get_acs5_profile_table_group_for_zcta_by_year(table, year) +

+ generate_acs5_profile_table_group_for_zcta_by_year(table, year)

@@ -1638,13 +1622,7 @@

Source code in src\backend\secondarydata.py -
- - - - - - - - - -
824
-825
-826
-827
-828
-829
-830
+            
830
 831
 832
 833
@@ -1685,246 +1663,60 @@ 

868 869 870 -871

def get_acs5_profile_table_group_for_zcta_by_year(
-    self, table: str, year: str
-) -> str:
-    """CSV output of an acs 5 year profile survey table.
-
-    TODO:
-        Update func name
-
-    Args:
-        table (str): census demo acs5 table
-        year (str): year to search
-
-    Returns:
-        str: file path where output is saved
-    """
-    file_name = f"{year}-acs-profile-table-{table}.json"
-    url = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({table})&for=zip%20code%20tabulation%20area:*"
-    list_of_list_table_json = self.get_and_cache_data(file_name, url)
-
-    if list_of_list_table_json is False:
-        log(
-            f"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.",
-            "warn",
-        )
-        return ""
-
-    self.translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(
-        list_of_list_table_json[0],  # type: ignore
-        table,
-        year,  # type: ignore
-    )
+871
+872
+873
+874
+875
+876
+877
def generate_acs5_profile_table_group_for_zcta_by_year(
+    self, table: str, year: str
+) -> str:
+    """CSV output of an acs 5 year profile survey table.
+
+    TODO:
+        Update func name
+
+    Args:
+        table (str): census demo acs5 table
+        year (str): year to search
+
+    Returns:
+        str: file path where output is saved
+    """
+    file_name = f"{year}-acs-profile-table-{table}.json"
+    url = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({table})&for=zip%20code%20tabulation%20area:*"
+    list_of_list_table_json = self.get_and_cache_data(file_name, url)
+
+    if list_of_list_table_json is False:
+        log(
+            f"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.",
+            "warn",
+        )
+        return ""
 
-    df = pl.DataFrame(list_of_list_table_json, orient="row")
-    # funky stuff to get the first list to be the name of the columns
-    df = (
-        df.rename(df.head(1).to_dicts().pop())
-        .slice(1)  # type: ignore
-        .drop("NAME", cs.matches("(?i)^(ann)"), cs.matches(f"(?i){table}"))
-        .rename({"zip code tabulation area": "ZCTA"})
-        .cast(
-            {
-                "ZCTA": pl.Int32,
-            }
-        )
-    )
-    file_path = CENSUS_DATA_DIR_PATH / "acs5-profile-group-{table}-zcta.csv"
-    df.write_csv(file_path)
-    return str(file_path)
-
- -
- - - - -
- - - - -

- get_acs5_profile_table_to_group_name(table, year) - -

- - -
- -

Get a JSON representation of a table's attributes.

- -
- Note -

Tables must be: - * DP02 - * DP02PR - * DP03 - * DP04 - * DP05

-

Returned object will have entries similar to: -

"DP05_0037M": {
-    "label": "Margin of Error!!RACE!!Total population!!One race!!White",
-    "concept": "ACS DEMOGRAPHIC AND HOUSING ESTIMATES",
-    "predicateType": "int",
-    "group": "DP05",
-    "limit": 0,
-    "predicateOnly": true
-}
-

-
- - -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
table - str - -
-

the table to lookup

-
-
- required -
year - str - -
-

which acs5 year to look up

-
-
- required -
- - - -

Returns:

- - - - - - - - - - - - - -
TypeDescription
- dict[str, Any] | None - -
-

str | Any: json object

-
-
- -
- Source code in src\backend\secondarydata.py -
def get_acs5_profile_table_to_group_name(
-    self, table: str, year: str
-) -> dict[str, Any] | None:
-    """Get a JSON representation of a table's attributes.
-
-    Note:
-        Tables must be:
-            * DP02
-            * DP02PR
-            * DP03
-            * DP04
-            * DP05
-
-        Returned object will have entries similar to:
-        ```json
-        "DP05_0037M": {
-            "label": "Margin of Error!!RACE!!Total population!!One race!!White",
-            "concept": "ACS DEMOGRAPHIC AND HOUSING ESTIMATES",
-            "predicateType": "int",
-            "group": "DP05",
-            "limit": 0,
-            "predicateOnly": true
-        }
-        ```
-
-    Args:
-        table (str): the table to lookup
-        year (str): which acs5 year to look up
-
-    Returns:
-        str | Any: json object
-    """
-    file_name = f"{year}-acs5-profile-groups-{table}.json"
-    groups_url = (
-        f"https://api.census.gov/data/{year}/acs/acs5/profile/groups/{table}.json"
-    )
-    groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)
-    if groups_to_label_translation is False:
-        log("Something is wrong with groups label dict", "warn")
-        return None
-    return groups_to_label_translation["variables"]  # type: ignore
+    self._translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(
+        list_of_list_table_json[0],  # type: ignore
+        table,
+        year,  # type: ignore
+    )
+
+    df = pl.DataFrame(list_of_list_table_json, orient="row")
+    # funky stuff to get the first list to be the name of the columns
+    df = (
+        df.rename(df.head(1).to_dicts().pop())
+        .slice(1)  # type: ignore
+        .drop("NAME", cs.matches("(?i)^(ann)"), cs.matches(f"(?i){table}"))
+        .rename({"zip code tabulation area": "ZCTA"})
+        .cast(
+            {
+                "ZCTA": pl.Int32,
+            }
+        )
+    )
+    table_file_name = CENSUS_DATA_DIR_PATH / f"acs5-profile-group-{table}-zcta.csv"
+    df.write_csv(table_file_name)
+    return str(table_file_name)
 
@@ -1937,8 +1729,8 @@

- get_acs5_subject_table_group_for_zcta_by_year(table, year) +

+ generate_acs5_subject_table_group_for_zcta_by_year(table, year)

@@ -1967,294 +1759,115 @@

-

census acs5 table

-

-
- required -
year - str - -
-

year to search

-
-
- required -
- -
- Source code in src\backend\secondarydata.py -
def get_acs5_subject_table_group_for_zcta_by_year(
-    self, table: str, year: str
-) -> str:
-    """CSV output of a acs 5 year subject survey table
-
-    Args:
-        table (str): census acs5 table
-        year (str): year to search
-    """
-    file_name = f"{year}-acs-subject-table-{table}.json"
-    url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=group({table})&for=zip%20code%20tabulation%20area:*"
-    list_of_list_table_json = self.get_and_cache_data(file_name, url)
-
-    if list_of_list_table_json is False:
-        log(
-            f"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.",
-            "warn",
-        )
-        return ""
-
-    self.translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(
-        list_of_list_table_json[0],  # type: ignore
-        table,
-        year,  # type: ignore
-    )
-
-    df = pl.DataFrame(list_of_list_table_json, orient="row")
-    # funky stuff to get the first list to be the name of the columns
-    df = (
-        df.rename(df.head(1).to_dicts().pop())
-        .slice(1)  # type: ignore
-        .drop("NAME", cs.matches("(?i)^(ann)"), cs.matches(f"(?i){table}"))
-        .rename({"zip code tabulation area": "ZCTA"})
-        .cast(
-            {
-                "ZCTA": pl.Int32,
-            }
-        )
-    )
-    file_path = CENSUS_DATA_DIR_PATH / "acs5-subject-group-{table}-zcta.csv"
-    # may not have to write. but cache func doesn't return whether it hits or not
-    df.write_csv(file_path)
-    return str(file_path)
-
-
-
- -

- - -
- - - - -

- get_acs5_subject_table_to_group_name(table, year) - -

- - -
- -

Get a JSON representation of a table's attributes.

- -
- Note -

Tables can be found at: https://www.census.gov/acs/www/data/data-tables-and-tools/subject-tables/

-

Returned object will have entries similar to: -

"DP05_0037M": {
-    "label": "Margin of Error!!RACE!!Total population!!One race!!White",
-    "concept": "ACS DEMOGRAPHIC AND HOUSING ESTIMATES",
-    "predicateType": "int",
-    "group": "DP05",
-    "limit": 0,
-    "predicateOnly": true
-}
-

-
- - -

Parameters:

- - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
table - str - -
-

the table to lookup

-
-
- required -
year - str - -
-

which acs5 year to look up

+

census acs5 table

required
- - - -

Returns:

- - - - - - - - + +
TypeDescription
year - dict[str, Any] | None + str
-

str | Any: variables

+

year to search

+ required +
Source code in src\backend\secondarydata.py -
def get_acs5_subject_table_to_group_name(
-    self, table: str, year: str
-) -> dict[str, Any] | None:
-    """Get a JSON representation of a table's attributes.
-
-    Note:
-        Tables can be found at: https://www.census.gov/acs/www/data/data-tables-and-tools/subject-tables/
-
-        Returned object will have entries similar to:
-        ```json
-        "DP05_0037M": {
-            "label": "Margin of Error!!RACE!!Total population!!One race!!White",
-            "concept": "ACS DEMOGRAPHIC AND HOUSING ESTIMATES",
-            "predicateType": "int",
-            "group": "DP05",
-            "limit": 0,
-            "predicateOnly": true
-        }
-        ```
-
-    Args:
-        table (str): the table to lookup
-        year (str): which acs5 year to look up
-
-    Returns:
-        str | Any: variables
-    """
-    file_name = f"{year}-acs5-subject-groups-{table}.json"
-    groups_url = (
-        f"https://api.census.gov/data/{year}/acs/acs5/subject/groups/{table}.json"
-    )
-    groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)
-    if groups_to_label_translation is False:
-        log("Something is wrong with groups label dict", "warn")
-        return None
-    return groups_to_label_translation["variables"]  # type: ignore
+            
def generate_acs5_subject_table_group_for_zcta_by_year(
+    self, table: str, year: str
+) -> str:
+    """CSV output of a acs 5 year subject survey table
+
+    Args:
+        table (str): census acs5 table
+        year (str): year to search
+    """
+    file_name = f"{year}-acs-subject-table-{table}.json"
+    url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=group({table})&for=zip%20code%20tabulation%20area:*"
+    list_of_list_table_json = self.get_and_cache_data(file_name, url)
+    if list_of_list_table_json is False:
+        log(
+            f"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.",
+            "warn",
+        )
+        return ""
+
+    self._translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(
+        list_of_list_table_json[0],  # type: ignore
+        table,
+        year,  # type: ignore
+    )
+
+    df = pl.DataFrame(list_of_list_table_json, orient="row")
+    # funky stuff to get the first list to be the name of the columns
+    df = (
+        df.rename(df.head(1).to_dicts().pop())
+        .slice(1)  # type: ignore
+        .drop("NAME", cs.matches("(?i)^(ann)"), cs.matches(f"(?i){table}"))
+        .rename({"zip code tabulation area": "ZCTA"})
+        .cast(
+            {
+                "ZCTA": pl.Int32,
+            }
+        )
+    )
+    table_file_name = CENSUS_DATA_DIR_PATH / f"acs5-subject-group-{table}-zcta.csv"
+    # may not have to write. but cache func doesn't return whether it hits or not
+    df.write_csv(table_file_name)
+    return str(table_file_name)
 
@@ -2347,9 +1960,7 @@

Source code in src\backend\secondarydata.py -
679
-680
-681
+            
681
 682
 683
 684
@@ -2381,41 +1992,45 @@ 

710 711 712 -713

def get_and_cache_data(
-    self, file_name: str, url_to_lookup_on_miss: str
-) -> dict[str, str] | bool:
-    """Cache files.
-
-    Args:
-        file_name (str): file name to save/lookup
-        url_to_lookup_on_miss (str): the Census url to lookup
-
-    Returns:
-        bool | dict[str, str] | None | Any: the dict of `tablename: label` or
-    """
-    CENSUS_DATA_DIR_PATH.mkdir(parents=True, exist_ok=True)
-
-    my_json = None
+713
+714
+715
+716
def get_and_cache_data(
+    self, file_name: str, url_to_lookup_on_miss: str
+) -> dict[str, str] | bool:
+    """Cache files.
+
+    Args:
+        file_name (str): file name to save/lookup
+        url_to_lookup_on_miss (str): the Census url to lookup
+
+    Returns:
+        bool | dict[str, str] | None | Any: the dict of `tablename: label` or
+    """
+    CENSUS_DATA_CACHE_PATH.mkdir(parents=True, exist_ok=True)
 
-    try:
-        with open(CENSUS_DATA_DIR_PATH / file_name, mode="r") as f:
-            log(f"Reading {file_name}", "debug")
-            try:
-                my_json = json.load(f)
-            except json.JSONDecodeError:
-                log("Could not decode cached file", "error")
-                return False
-    except FileNotFoundError:
-        req = self._get(url_to_lookup_on_miss)
-        if req is None:
-            log(f"Could not find census file {req = }", "error")
-            return False
-        req.raise_for_status()
-        my_json = req.json()
-        with open(CENSUS_DATA_DIR_PATH / file_name, "w") as f:
-            json.dump(my_json, f)
-
-    return my_json
+    my_json = None
+
+    try:
+        with open(CENSUS_DATA_CACHE_PATH / file_name, mode="r") as f:
+            log(f"Reading {file_name}", "debug")
+            try:
+                my_json = json.load(f)
+            except json.JSONDecodeError:
+                log("Could not decode cached census file", "error")
+                return False
+    except FileNotFoundError:
+        req = self._get(url_to_lookup_on_miss)
+        log(f"Getting {url_to_lookup_on_miss}...", "info")
+        if req is None:
+            log(f"Could not get census file {file_name}.", "error")
+            return False
+        req.raise_for_status()
+        my_json = req.json()
+        with open(CENSUS_DATA_CACHE_PATH / file_name, "w") as f:
+            json.dump(my_json, f)
+
+    return my_json
 
@@ -2498,10 +2113,7 @@

Source code in src\backend\secondarydata.py -
715
-716
-717
-718
+            
718
 719
 720
 721
@@ -2516,389 +2128,28 @@ 

730 731 732 -733

def get_race_makeup_by_zcta(self, zcta: str) -> str | None:
-    """Get race make up by zcta from. DO NOT USE
-
-    Note:
-        use `get_table_group_for_zcta_by_state_by_year`
+733
+734
+735
+736
def get_race_makeup_by_zcta(self, zcta: str) -> str | None:
+    """Get race make up by zcta from. DO NOT USE
 
-    Args:
-        zcta (str): zcta
+    Note:
+        use `get_table_group_for_zcta_by_state_by_year`
 
-    Returns:
-        str | None: text or none
-    """
-    # get white, black, american indian/native alaskan, asian, NH/PI, other. note that these are estimates, margin of error can be had with "M"
-    req = self._get(
-        f"https://api.census.gov/data/2021/acs/acs5/profile?get=DP05_0064E,DP05_0065E,DP05_0066E,DP05_0067E,DP05_0068E,DP05_0069E&for=zip%20code%20tabulation%20area:{zcta}&key={self.api_key}"
-    )
-    if req is None:
-        return None
-    return req.text
-
- -
- - - - -
- - - - -

- translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(headers, table, year) - -

- - -
- -

Get the label name for a table and row for the acs5 profile surveys.

- - - -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
headers - list[str] - -
-

header row

-
-
- required -
table - str - -
-

have to look again

-
-
- required -
year - str - -
-

the year

-
-
- required -
- - - -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
None - None - -
-

translates the list of table_row_selector to its english label

-
-
- -
- Source code in src\backend\secondarydata.py -
def translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(
-    self, headers: list[str], table: str, year: str
-) -> None:
-    """Get the label name for a table and row for the acs5 profile surveys.
-
-    Args:
-        headers (list[str]): header row
-        table (str): have to look again
-        year (str): the year
-
-    Returns:
-        None: translates the list of table_row_selector to its english label
-    """
-    # is going to read the file multiple times, save last req as {"table": req_json[table]...} for this?
-    groups_to_label_translation_dict = self.get_acs5_profile_table_to_group_name(
-        table, year
-    )
-    if groups_to_label_translation_dict is None:
-        log("Could not translate headers", "warn")
-        return groups_to_label_translation_dict
-
-    for idx, header in enumerate(headers):
-        new_col_name_dict = groups_to_label_translation_dict.get(header)
-        if new_col_name_dict is None:
-            # returns none if not in dict, means we have custom name and can continue
-            continue
-        new_col_name = new_col_name_dict["label"]
-        # qgis doesnt allow field names of 80+ chars. massage into form, then cut off
-        # delimiter for table subsection
-        new_col_name = re.sub("!!", " ", new_col_name)
-        new_col_name = re.sub(r"\s+", " ", new_col_name)
-        # easier to read
-        new_col_name_parts = new_col_name.split(" ")
-        for idy, no_format in enumerate(new_col_name_parts):
-            new_col_name_parts[idy] = no_format.capitalize()
-        new_col_name = "".join(new_col_name_parts)
-        # shortenings to fit length requirement
-        for key, value in replace_dict.items():
-            new_col_name = re.sub(key, value, new_col_name)
-        # limiter
-        new_col_name = new_col_name[
-            : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)
-        ]
-
-        if new_col_name not in headers[:idx]:
-            headers[idx] = new_col_name
-
-
-
- -
- - -
- - - - -

- translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(headers, table, year) - -

- - -
- -

Gets the label name for a table and row for the acs5 profile surveys.

- - - -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
headers - list[str] - -
-

headers

-
-
- required -
table - str - -
-

table

-
-
- required -
year - str - -
-

year

-
-
- required -
- -
- Source code in src\backend\secondarydata.py -
def translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(
-    self, headers: list[str], table: str, year: str
-) -> None:
-    """Gets the label name for a table and row for the acs5 profile surveys.
-
-    Args:
-        headers (list[str]): headers
-        table (str): table
-        year (str): year
-    """
-    # is going to read the file multiple times, save last req as {"table": req_json[table]...} for this?
-    groups_to_label_translation_dict = self.get_acs5_subject_table_to_group_name(
-        table, year
-    )
-    if groups_to_label_translation_dict is None:
-        log("Could not translate headers", "warn")
-        return groups_to_label_translation_dict
-
-    for idx, header in enumerate(headers):
-        new_col_name_dict = groups_to_label_translation_dict.get(header)
-        if new_col_name_dict is None:
-            # returns none if not in dict, means we have custom name and can continue
-            continue
-        new_col_name = new_col_name_dict["label"]
-        # qgis doesnt allow field names of 80+ chars. massage into form, then cut off
-        # delimiter for table subsection
-        new_col_name = re.sub("!!", " ", new_col_name)
-        new_col_name = re.sub(r"\s+", " ", new_col_name)
-        # easier to read
-        new_col_name_parts = new_col_name.split(" ")
-        for idy, no_format in enumerate(new_col_name_parts):
-            new_col_name_parts[idy] = no_format.capitalize()
-        new_col_name = "".join(new_col_name_parts)
-        # shortenings to fit length requirement
-        for key, value in replace_dict.items():
-            new_col_name = re.sub(key, value, new_col_name)
-        # limiter
-        new_col_name = new_col_name[
-            : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)
-        ]
-
-        if new_col_name not in headers[:idx]:
-            headers[idx] = new_col_name
+    Args:
+        zcta (str): zcta
+
+    Returns:
+        str | None: text or none
+    """
+    # get white, black, american indian/native alaskan, asian, NH/PI, other. note that these are estimates, margin of error can be had with "M"
+    req = self._get(
+        f"https://api.census.gov/data/2021/acs/acs5/profile?get=DP05_0064E,DP05_0065E,DP05_0066E,DP05_0067E,DP05_0068E,DP05_0069E&for=zip%20code%20tabulation%20area:{zcta}&key={self.api_key}"
+    )
+    if req is None:
+        return None
+    return req.text
 
@@ -2937,9 +2188,7 @@

Source code in src\backend\secondarydata.py -
 49
- 50
- 51
+              
 51
  52
  53
  54
@@ -3539,609 +2788,611 @@ 

648 649 650 -651

class EIADataRetriever:
-    """Interact with the EIA open data API.
-
-    Note:
-        This is the "manual" for this API:
-        https://www.eia.gov/opendata/pdf/EIA-APIv2-HandsOn-Webinar-11-Jan-23.pdf
-    """
-
-    HEATING_OIL_STATES_ABBR = {
-        sts.CT.abbr,
-        sts.DC.abbr,
-        sts.DE.abbr,
-        sts.IA.abbr,
-        sts.IL.abbr,
-        sts.IN.abbr,
-        sts.KS.abbr,
-        sts.KY.abbr,
-        sts.MA.abbr,
-        sts.MD.abbr,
-        sts.ME.abbr,
-        sts.MI.abbr,
-        sts.MN.abbr,
-        sts.MO.abbr,
-        sts.NC.abbr,
-        sts.ND.abbr,
-        sts.NE.abbr,
-        sts.NH.abbr,
-        sts.NJ.abbr,
-        sts.NY.abbr,
-        sts.OH.abbr,
-        sts.PA.abbr,
-        sts.RI.abbr,
-        sts.SD.abbr,
-        sts.VA.abbr,
-        sts.VT.abbr,
-        sts.WI.abbr,
-    }
-
-    PROPANE_STATES_ABBR = {
-        sts.AL.abbr,
-        sts.AR.abbr,
-        sts.CO.abbr,
-        sts.CT.abbr,
-        sts.DE.abbr,
-        sts.FL.abbr,
-        sts.GA.abbr,
-        sts.IL.abbr,
-        sts.IN.abbr,
-        sts.KS.abbr,
-        sts.KY.abbr,
-        sts.KY.abbr,
-        sts.MA.abbr,
-        sts.MD.abbr,
-        sts.ME.abbr,
-        sts.MI.abbr,
-        sts.MN.abbr,
-        sts.MO.abbr,
-        sts.MS.abbr,
-        sts.MT.abbr,
-        sts.NC.abbr,
-        sts.ND.abbr,
-        sts.NE.abbr,
-        sts.NH.abbr,
-        sts.NJ.abbr,
-        sts.NY.abbr,
-        sts.OH.abbr,
-        sts.OK.abbr,
-        sts.PA.abbr,
-        sts.RI.abbr,
-        sts.SD.abbr,
-        sts.TN.abbr,
-        sts.TX.abbr,
-        sts.UT.abbr,
-        sts.VA.abbr,
-        sts.VT.abbr,
-        sts.WI.abbr,
-    }
-
-    class HeaterEfficiencies(Enum):
-        """Combination of system efficiency and distribution efficiency.
-
-        Note:
-            Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/
-        """
-
-        HEAT_PUMP_GEOTHERMAL = 3.69
-        HEAT_PUMP_DUCTLESS = 2.7  # mini split
-        HEAT_PUMP_DUCTED = 2.16
-        BASEBOARD = 1
-        KEROSENE_ROOM_HEATER = 0.87
-        PROPANE_BOILER = 0.837
-        NG_BOILER = 0.828
-        NG_ROOM_HEATER = 0.81
-        PROPANE_ROOM_HEATER = 0.81
-        OIL_BOILER = 0.783
-        WOOD_STOVE = 0.75
-        PELLET_STOVE = 0.75
-        NG_FURNACE = 0.744  #! double check this value
-        PROPANE_FURNACE = 0.744
-        OIL_FURNACE = 0.704
-        PELLET_BOILER = 0.639
-
-    class EnergyType(Enum):
-        PROPANE = 1
-        HEATING_OIL = 2
-        NATURAL_GAS = 3
-        ELECTRICITY = 4
-
-    class PetroleumProductTypes(StrEnum):
-        NATURAL_GAS = "EPG0"
-        PROPANE = "EPLLPA"
-        HEATING_OIL = "EPD2F"
-
-    class FuelBTUConversion(Enum):
-        # https://www.edf.org/sites/default/files/10071_EDF_BottomBarrel_Ch3.pdf
-        # https://www.eia.gov/energyexplained/units-and-calculators/british-thermal-units.php
-        # https://www.eia.gov/energyexplained/units-and-calculators/
-        NO1_OIL_BTU_PER_GAL = 135_000
-        NO2_OIL_BTU_PER_GAL = 140_000
-        NO4_OIL_BTU_PER_GAL = 146_000
-        NO5_OIL_BTU_PER_GAL = 144_500
-        NO6_OIL_BTU_PER_GAL = 150_000
-        HEATING_OIL_BTU_PER_GAL = 138_500
-        ELECTRICITY_BTU_PER_KWH = 3_412.14
-        NG_BTU_PER_MCT = 1_036_000  # 1000 cubic feet of gas
-        NG_BTU_PER_THERM = 100_000
-        PROPANE_BTU_PER_GAL = 91_452
-        WOOD_BTU_PER_CORD = 20_000_000
-
-    def __init__(self):
-        self.eia_base_url = "https://api.eia.gov/v2"
-        self.api_key = os.getenv("EIA_API_KEY")
-        if self.api_key is None:
-            log(
-                "No Census API key found in a .env file in project directory. please request a key at https://www.eia.gov/opendata/register.php",
-                "critical",
-            )
-            exit()
-
-    def price_per_mbtu_with_efficiency(
-        self, energy_price_dict: dict
-    ) -> dict[str, str | EnergyType | float]:
-        """Convert an energy source's price per quantity into price per BTU with an efficiency.
-
-        Note:
-            Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf
-
-        See also:
-            `EIADataRetriever.HeaterEfficiencies`
-
-        Args:
-            energy_price_dict (dict): energy source json
-
-        Returns:
-            dict: new dictionary with btu centric pricing
-        """
-        #! make new function based on burner type/ end usage type
-        CENTS_IN_DOLLAR = 100
-        match energy_price_dict.get("type"):
-            case self.EnergyType.PROPANE.value:
-                # for loop is done for every case since i dont want to use `eval` or parse a string of division to keep PEMDAS. this is why i dont have an efficiency func yet
-                for key, value in energy_price_dict.items():
-                    if (
-                        key in ["type", "state", None]
-                        or energy_price_dict.get(key) is None
-                    ):
-                        continue
-                    energy_price_dict[key] = (
-                        value
-                        / (
-                            self.FuelBTUConversion.PROPANE_BTU_PER_GAL.value
-                            * self.HeaterEfficiencies.PROPANE_FURNACE.value
-                        )
-                        * 1_000
-                    )
-            case self.EnergyType.NATURAL_GAS.value:
-                for key, value in energy_price_dict.items():
-                    if (
-                        key in ["type", "state", None]
-                        or energy_price_dict.get(key) is None
-                    ):
-                        continue
-                    energy_price_dict[key] = (
-                        value
-                        / (
-                            self.FuelBTUConversion.NG_BTU_PER_MCT.value
-                            * self.HeaterEfficiencies.NG_FURNACE.value
-                        )
-                        * 1_000
-                    )
-            case self.EnergyType.ELECTRICITY.value:
-                for key, value in energy_price_dict.items():
-                    if (
-                        key in ["type", "state", None]
-                        or energy_price_dict.get(key) is None
-                    ):
-                        continue
-                    energy_price_dict[key] = (
-                        value
-                        / CENTS_IN_DOLLAR
-                        / (
-                            self.FuelBTUConversion.ELECTRICITY_BTU_PER_KWH.value
-                            * self.HeaterEfficiencies.HEAT_PUMP_DUCTED.value
-                        )
-                        * 1_000
-                    )
-            case self.EnergyType.HEATING_OIL.value:
-                for key, value in energy_price_dict.items():
-                    if (
-                        key in ["type", "state", None]
-                        or energy_price_dict.get(key) is None
-                    ):
-                        continue
-                    energy_price_dict[key] = (
-                        value
-                        / (
-                            self.FuelBTUConversion.HEATING_OIL_BTU_PER_GAL.value
-                            * self.HeaterEfficiencies.OIL_BOILER.value
-                        )
-                        * 1_000
-                    )
-            case _:
-                log("Could not translate dict to btu per price.", "warn")
-
-        return energy_price_dict
+651
+652
+653
class EIADataRetriever:
+    """Interact with the EIA open data API.
+
+    Note:
+        This is the "manual" for this API:
+        https://www.eia.gov/opendata/pdf/EIA-APIv2-HandsOn-Webinar-11-Jan-23.pdf
+    """
+
+    HEATING_OIL_STATES_ABBR = {
+        sts.CT.abbr,
+        sts.DC.abbr,
+        sts.DE.abbr,
+        sts.IA.abbr,
+        sts.IL.abbr,
+        sts.IN.abbr,
+        sts.KS.abbr,
+        sts.KY.abbr,
+        sts.MA.abbr,
+        sts.MD.abbr,
+        sts.ME.abbr,
+        sts.MI.abbr,
+        sts.MN.abbr,
+        sts.MO.abbr,
+        sts.NC.abbr,
+        sts.ND.abbr,
+        sts.NE.abbr,
+        sts.NH.abbr,
+        sts.NJ.abbr,
+        sts.NY.abbr,
+        sts.OH.abbr,
+        sts.PA.abbr,
+        sts.RI.abbr,
+        sts.SD.abbr,
+        sts.VA.abbr,
+        sts.VT.abbr,
+        sts.WI.abbr,
+    }
+
+    PROPANE_STATES_ABBR = {
+        sts.AL.abbr,
+        sts.AR.abbr,
+        sts.CO.abbr,
+        sts.CT.abbr,
+        sts.DE.abbr,
+        sts.FL.abbr,
+        sts.GA.abbr,
+        sts.IL.abbr,
+        sts.IN.abbr,
+        sts.KS.abbr,
+        sts.KY.abbr,
+        sts.KY.abbr,
+        sts.MA.abbr,
+        sts.MD.abbr,
+        sts.ME.abbr,
+        sts.MI.abbr,
+        sts.MN.abbr,
+        sts.MO.abbr,
+        sts.MS.abbr,
+        sts.MT.abbr,
+        sts.NC.abbr,
+        sts.ND.abbr,
+        sts.NE.abbr,
+        sts.NH.abbr,
+        sts.NJ.abbr,
+        sts.NY.abbr,
+        sts.OH.abbr,
+        sts.OK.abbr,
+        sts.PA.abbr,
+        sts.RI.abbr,
+        sts.SD.abbr,
+        sts.TN.abbr,
+        sts.TX.abbr,
+        sts.UT.abbr,
+        sts.VA.abbr,
+        sts.VT.abbr,
+        sts.WI.abbr,
+    }
+
+    class HeaterEfficiencies(Enum):
+        """Combination of system efficiency and distribution efficiency.
+
+        Note:
+            Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/
+        """
+
+        HEAT_PUMP_GEOTHERMAL = 3.69
+        HEAT_PUMP_DUCTLESS = 2.7  # mini split
+        HEAT_PUMP_DUCTED = 2.16
+        BASEBOARD = 1
+        KEROSENE_ROOM_HEATER = 0.87
+        PROPANE_BOILER = 0.837
+        NG_BOILER = 0.828
+        NG_ROOM_HEATER = 0.81
+        PROPANE_ROOM_HEATER = 0.81
+        OIL_BOILER = 0.783
+        WOOD_STOVE = 0.75
+        PELLET_STOVE = 0.75
+        NG_FURNACE = 0.744  #! double check this value
+        PROPANE_FURNACE = 0.744
+        OIL_FURNACE = 0.704
+        PELLET_BOILER = 0.639
+
+    class EnergyType(Enum):
+        PROPANE = 1
+        HEATING_OIL = 2
+        NATURAL_GAS = 3
+        ELECTRICITY = 4
+
+    class PetroleumProductTypes(StrEnum):
+        NATURAL_GAS = "EPG0"
+        PROPANE = "EPLLPA"
+        HEATING_OIL = "EPD2F"
+
+    class FuelBTUConversion(Enum):
+        # https://www.edf.org/sites/default/files/10071_EDF_BottomBarrel_Ch3.pdf
+        # https://www.eia.gov/energyexplained/units-and-calculators/british-thermal-units.php
+        # https://www.eia.gov/energyexplained/units-and-calculators/
+        NO1_OIL_BTU_PER_GAL = 135_000
+        NO2_OIL_BTU_PER_GAL = 140_000
+        NO4_OIL_BTU_PER_GAL = 146_000
+        NO5_OIL_BTU_PER_GAL = 144_500
+        NO6_OIL_BTU_PER_GAL = 150_000
+        HEATING_OIL_BTU_PER_GAL = 138_500
+        ELECTRICITY_BTU_PER_KWH = 3_412.14
+        NG_BTU_PER_MCT = 1_036_000  # 1000 cubic feet of gas
+        NG_BTU_PER_THERM = 100_000
+        PROPANE_BTU_PER_GAL = 91_452
+        WOOD_BTU_PER_CORD = 20_000_000
+
+    def __init__(self):
+        self.eia_base_url = "https://api.eia.gov/v2"
+        self.api_key = os.getenv("EIA_API_KEY")
+        if self.api_key is None:
+            log(
+                "No Census API key found in a .env file in project directory. please request a key at https://www.eia.gov/opendata/register.php",
+                "critical",
+            )
+            exit()
+
+    def price_per_mbtu_with_efficiency(
+        self, energy_price_dict: dict
+    ) -> dict[str, str | EnergyType | float]:
+        """Convert an energy source's price per quantity into price per BTU with an efficiency.
+
+        Note:
+            Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf
+
+        See also:
+            `EIADataRetriever.HeaterEfficiencies`
+
+        Args:
+            energy_price_dict (dict): energy source json
+
+        Returns:
+            dict: new dictionary with btu centric pricing
+        """
+        #! make new function based on burner type/ end usage type
+        CENTS_IN_DOLLAR = 100
+        match energy_price_dict.get("type"):
+            case self.EnergyType.PROPANE.value:
+                # for loop is done for every case since i dont want to use `eval` or parse a string of division to keep PEMDAS. this is why i dont have an efficiency func yet
+                for key, value in energy_price_dict.items():
+                    if (
+                        key in ["type", "state", None]
+                        or energy_price_dict.get(key) is None
+                    ):
+                        continue
+                    energy_price_dict[key] = (
+                        value
+                        / (
+                            self.FuelBTUConversion.PROPANE_BTU_PER_GAL.value
+                            * self.HeaterEfficiencies.PROPANE_FURNACE.value
+                        )
+                        * 1_000
+                    )
+            case self.EnergyType.NATURAL_GAS.value:
+                for key, value in energy_price_dict.items():
+                    if (
+                        key in ["type", "state", None]
+                        or energy_price_dict.get(key) is None
+                    ):
+                        continue
+                    energy_price_dict[key] = (
+                        value
+                        / (
+                            self.FuelBTUConversion.NG_BTU_PER_MCT.value
+                            * self.HeaterEfficiencies.NG_FURNACE.value
+                        )
+                        * 1_000
+                    )
+            case self.EnergyType.ELECTRICITY.value:
+                for key, value in energy_price_dict.items():
+                    if (
+                        key in ["type", "state", None]
+                        or energy_price_dict.get(key) is None
+                    ):
+                        continue
+                    energy_price_dict[key] = (
+                        value
+                        / CENTS_IN_DOLLAR
+                        / (
+                            self.FuelBTUConversion.ELECTRICITY_BTU_PER_KWH.value
+                            * self.HeaterEfficiencies.HEAT_PUMP_DUCTED.value
+                        )
+                        * 1_000
+                    )
+            case self.EnergyType.HEATING_OIL.value:
+                for key, value in energy_price_dict.items():
+                    if (
+                        key in ["type", "state", None]
+                        or energy_price_dict.get(key) is None
+                    ):
+                        continue
+                    energy_price_dict[key] = (
+                        value
+                        / (
+                            self.FuelBTUConversion.HEATING_OIL_BTU_PER_GAL.value
+                            * self.HeaterEfficiencies.OIL_BOILER.value
+                        )
+                        * 1_000
+                    )
+            case _:
+                log("Could not translate dict to btu per price.", "warn")
 
-    # api to dict handler Helpers
-    def price_dict_to_clean_dict(
-        self, eia_json: dict, energy_type: EnergyType, state: str
-    ) -> dict[str, str | EnergyType | float]:
-        """Clean JSON data returned by EIA's API.
-
-        Args:
-            eia_json (dict): the response JSON
-            energy_type (EnergyType): the energy type
-            state (str): the state
-
-        Returns:
-            dict[str, str | EnergyType | float]: cleaned JSON
-        """
-        # price key is different for electricity
-        accessor = "value"
-        if "product" not in eia_json["response"]["data"][0]:
-            accessor = "price"
-
-        result_dict = {
-            entry["period"]: entry[f"{accessor}"]
-            for entry in eia_json["response"]["data"]
-        }
-        result_dict["type"] = energy_type.value
-        result_dict["state"] = state
-
-        return result_dict
+        return energy_price_dict
+
+    # api to dict handler Helpers
+    def price_dict_to_clean_dict(
+        self, eia_json: dict, energy_type: EnergyType, state: str
+    ) -> dict[str, str | EnergyType | float]:
+        """Clean JSON data returned by EIA's API.
+
+        Args:
+            eia_json (dict): the response JSON
+            energy_type (EnergyType): the energy type
+            state (str): the state
+
+        Returns:
+            dict[str, str | EnergyType | float]: cleaned JSON
+        """
+        # price key is different for electricity
+        accessor = "value"
+        if "product" not in eia_json["response"]["data"][0]:
+            accessor = "price"
+
+        result_dict = {
+            entry["period"]: entry[f"{accessor}"]
+            for entry in eia_json["response"]["data"]
+        }
+        result_dict["type"] = energy_type.value
+        result_dict["state"] = state
 
-    def price_df_to_clean_dict(
-        self, eia_df: pl.DataFrame, energy_type: EnergyType, state: str
-    ) -> dict[str, str | EnergyType | float]:
-        """Clean DataFrame data consisting of EIA API data.
-
-        Args:
-            eia_df (pl.DataFrame): the DataFrame to clean
-            energy_type (EnergyType): the energy type
-            state (str): the state
-
-        Returns:
-            dict[str, str|EnergyType|float]: the dict
-        """
-        result_dict = {}
-        for row in eia_df.rows(named=True):
-            year_month = f"{row.get("year")}-{row.get("month"):02}"
-            if row.get("monthly_avg_price") is not None:
-                result_dict[year_month] = round(row.get("monthly_avg_price"), 3)  # type: ignore
-        result_dict["type"] = energy_type.value
-        result_dict["state"] = state
-        return result_dict
-
-    # api to dict handler
-    def price_to_clean_dict(
-        self, price_struct: dict | pl.DataFrame, energy_type: EnergyType, state: str
-    ) -> dict[str, str | EnergyType | float]:
-        """Handle the different data types that EIA data could be stored in.
-
-        Args:
-            price_struct (dict | pl.DataFrame): a data structure containing the year, month, and price info
-            energy_type (EnergyType): the energy type
-            state (str): the state
-
-        Raises:
-            TypeError: raised if the type of `price_struct` is not supported
-
-        Returns:
-            dict[str, str|EnergyType|float]: the normalized and structured data in dict form
-        """
-        match price_struct:
-            case dict():
-                return self.price_dict_to_clean_dict(price_struct, energy_type, state)
-            case pl.DataFrame():
-                return self.price_df_to_clean_dict(price_struct, energy_type, state)
-            case _:
-                raise TypeError(f"Type not supported: {type(energy_type)}")
-
-    # api interaction
-    def monthly_electricity_price_per_kwh(
-        self, state: str, start_date: datetime.date, end_date: datetime.date
-    ) -> dict[str, Any]:
-        """Get a state's average monthly energy price.
-
-        Note:
-            Data is returned in cents/KWh.
-
-        Args:
-            state (str): the 2 character postal code of a state
-            start_date (datetime.date): the start date, inclusive
-            end_date (datetime.date): the end date, non inclusive
-
-        Returns:
-            dict: the dictionary in `year-month: price` form
-        """
-        url = f"{self.eia_base_url}/electricity/retail-sales/data/?frequency=monthly&data[0]=price&facets[stateid][]={state}&facets[sectorid][]=RES&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
-
-        eia_request = req_get_wrapper(url)
-        eia_request.raise_for_status()
-
-        return eia_request.json()
+        return result_dict
+
+    def price_df_to_clean_dict(
+        self, eia_df: pl.DataFrame, energy_type: EnergyType, state: str
+    ) -> dict[str, str | EnergyType | float]:
+        """Clean DataFrame data consisting of EIA API data.
+
+        Args:
+            eia_df (pl.DataFrame): the DataFrame to clean
+            energy_type (EnergyType): the energy type
+            state (str): the state
+
+        Returns:
+            dict[str, str|EnergyType|float]: the dict
+        """
+        result_dict = {}
+        for row in eia_df.rows(named=True):
+            year_month = f"{row.get("year")}-{row.get("month"):02}"
+            if row.get("monthly_avg_price") is not None:
+                result_dict[year_month] = round(row.get("monthly_avg_price"), 3)  # type: ignore
+        result_dict["type"] = energy_type.value
+        result_dict["state"] = state
+        return result_dict
+
+    # api to dict handler
+    def price_to_clean_dict(
+        self, price_struct: dict | pl.DataFrame, energy_type: EnergyType, state: str
+    ) -> dict[str, str | EnergyType | float]:
+        """Handle the different data types that EIA data could be stored in.
+
+        Args:
+            price_struct (dict | pl.DataFrame): a data structure containing the year, month, and price info
+            energy_type (EnergyType): the energy type
+            state (str): the state
+
+        Raises:
+            TypeError: raised if the type of `price_struct` is not supported
+
+        Returns:
+            dict[str, str|EnergyType|float]: the normalized and structured data in dict form
+        """
+        match price_struct:
+            case dict():
+                return self.price_dict_to_clean_dict(price_struct, energy_type, state)
+            case pl.DataFrame():
+                return self.price_df_to_clean_dict(price_struct, energy_type, state)
+            case _:
+                raise TypeError(f"Type not supported: {type(energy_type)}")
+
+    # api interaction
+    def monthly_electricity_price_per_kwh(
+        self, state: str, start_date: datetime.date, end_date: datetime.date
+    ) -> dict[str, Any]:
+        """Get a state's average monthly energy price.
+
+        Note:
+            Data is returned in cents/KWh.
+
+        Args:
+            state (str): the 2 character postal code of a state
+            start_date (datetime.date): the start date, inclusive
+            end_date (datetime.date): the end date, non inclusive
+
+        Returns:
+            dict: the dictionary in `year-month: price` form
+        """
+        url = f"{self.eia_base_url}/electricity/retail-sales/data/?frequency=monthly&data[0]=price&facets[stateid][]={state}&facets[sectorid][]=RES&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
+
+        eia_request = req_get_wrapper(url)
+        eia_request.raise_for_status()
 
-    def monthly_ng_price_per_mcf(
-        self, state: str, start_date: datetime.date, end_date: datetime.date
-    ) -> dict[str, Any]:
-        """Get a state's average natural gas price.
-
-        Note:
-            Data is returned in dollars per mega cubic feet.
-
-        Args:
-            state (str): the 2 character postal code of a state
-            start_date (datetime.date): the start date, inclusive
-            end_date (datetime.date): the end date, non inclusive
-
-        Returns:
-            dict: _description_
-        """
-        # $/mcf
-        url = f"https://api.eia.gov/v2/natural-gas/pri/sum/data/?frequency=monthly&data[0]=value&facets[duoarea][]=S{state}&facets[process][]=PRS&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
-
-        eia_request = req_get_wrapper(url)
-        eia_request.raise_for_status()
-
-        return eia_request.json()
+        return eia_request.json()
+
+    def monthly_ng_price_per_mcf(
+        self, state: str, start_date: datetime.date, end_date: datetime.date
+    ) -> dict[str, Any]:
+        """Get a state's average natural gas price.
+
+        Note:
+            Data is returned in dollars per mega cubic feet.
+
+        Args:
+            state (str): the 2 character postal code of a state
+            start_date (datetime.date): the start date, inclusive
+            end_date (datetime.date): the end date, non inclusive
+
+        Returns:
+            dict: _description_
+        """
+        # $/mcf
+        url = f"https://api.eia.gov/v2/natural-gas/pri/sum/data/?frequency=monthly&data[0]=value&facets[duoarea][]=S{state}&facets[process][]=PRS&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
+
+        eia_request = req_get_wrapper(url)
+        eia_request.raise_for_status()
 
-    def monthly_heating_season_heating_oil_price_per_gal(
-        self, state: str, start_date: datetime.date, end_date: datetime.date
-    ) -> pl.DataFrame:
-        """Get a state's average heating oil price.
-
-        Note:
-            Data returned is in dollars per gallon.
-
-            Only these states are tracked, and only for the months October through March:
-                * CT
-                * DC
-                * DE
-                * IA
-                * IL
-                * IN
-                * KS
-                * KY
-                * MA
-                * MD
-                * ME
-                * MI
-                * MN
-                * MO
-                * NC
-                * ND
-                * NE
-                * NH
-                * NJ
-                * NY
-                * OH
-                * PA
-                * RI
-                * SD
-                * VA
-                * VT
-                * WI
-        Args:
-            state (str): 2 char postal code
-            start_date (datetime.date): the start date, inclusive
-            end_date (datetime.date): the end date, non inclusive
-
-        Returns:
-            dict: _description_
-        """
-        # heating season is Oct - march, $/gal
-        url = f"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[duoarea][]=S{state}&facets[product][]=EPD2F&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
-
-        eia_request = req_get_wrapper(url)
-        eia_request.raise_for_status()
-
-        json = eia_request.json()
-        df = pl.DataFrame(json["response"]["data"])
-        # becomes int, so months are sig figs
-        df = df.with_columns(pl.col("period").str.strptime(pl.Date))
-        df = df.with_columns(
-            pl.col("period").dt.year().alias("year"),
-            pl.col("period").dt.month().alias("month"),
-        )
-
-        monthly_avg_price = (
-            df.group_by(["year", "month"])
-            .agg(pl.col("value").mean().alias("monthly_avg_price"))
-            .sort("year", "month")
-        )
-
-        return monthly_avg_price
+        return eia_request.json()
+
+    def monthly_heating_season_heating_oil_price_per_gal(
+        self, state: str, start_date: datetime.date, end_date: datetime.date
+    ) -> pl.DataFrame:
+        """Get a state's average heating oil price.
+
+        Note:
+            Data returned is in dollars per gallon.
+
+            Only these states are tracked, and only for the months October through March:
+                * CT
+                * DC
+                * DE
+                * IA
+                * IL
+                * IN
+                * KS
+                * KY
+                * MA
+                * MD
+                * ME
+                * MI
+                * MN
+                * MO
+                * NC
+                * ND
+                * NE
+                * NH
+                * NJ
+                * NY
+                * OH
+                * PA
+                * RI
+                * SD
+                * VA
+                * VT
+                * WI
+        Args:
+            state (str): 2 char postal code
+            start_date (datetime.date): the start date, inclusive
+            end_date (datetime.date): the end date, non inclusive
+
+        Returns:
+            dict: _description_
+        """
+        # heating season is Oct - march, $/gal
+        url = f"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[duoarea][]=S{state}&facets[product][]=EPD2F&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
+
+        eia_request = req_get_wrapper(url)
+        eia_request.raise_for_status()
+
+        json = eia_request.json()
+        df = pl.DataFrame(json["response"]["data"])
+        # becomes int, so months are sig figs
+        df = df.with_columns(pl.col("period").str.strptime(pl.Date))
+        df = df.with_columns(
+            pl.col("period").dt.year().alias("year"),
+            pl.col("period").dt.month().alias("month"),
+        )
+
+        monthly_avg_price = (
+            df.group_by(["year", "month"])
+            .agg(pl.col("value").mean().alias("monthly_avg_price"))
+            .sort("year", "month")
+        )
 
-    def monthly_heating_season_propane_price_per_gal(
-        self, state: str, start_date: datetime.date, end_date: datetime.date
-    ) -> pl.DataFrame:
-        """Get a state's average propane price in dollars per gal.
-
-        Note:
-            Only these states are tracked, and only for the months October through Marc:
-                * AL
-                * AR
-                * CO
-                * CT
-                * DE
-                * FL
-                * GA
-                * IL
-                * IN
-                * KS
-                * KY
-                * KY
-                * MA
-                * MD
-                * ME
-                * MI
-                * MN
-                * MO
-                * MS
-                * MT
-                * NC
-                * ND
-                * NE
-                * NH
-                * NJ
-                * NY
-                * OH
-                * OK
-                * PA
-                * RI
-                * SD
-                * TN
-                * TX
-                * UT
-                * VA
-                * VT
-                * WI
-
-        Args:
-            state (str): 2 character postal code
-            start_date (datetime.date): the start date, inclusive
-            end_date (datetime.date): the end date, non inclusive
-
-        Returns:
-            dict: _description_
-        """
-        # heating season is Oct - march, $/gal
-        url = f"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[process][]=PRS&facets[duoarea][]=S{state}&facets[product][]=EPLLPA&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
-
-        eia_request = req_get_wrapper(url)
-        eia_request.raise_for_status()
-
-        json = eia_request.json()
-        # return self.price_json_to_dict(eia_request.json())
-        df = pl.DataFrame(json["response"]["data"])
-        # df = df.with_columns(pl.col("period").str.to_date().alias("period"))
-        df = df.with_columns(pl.col("period").str.strptime(pl.Date))
-        df = df.with_columns(
-            pl.col("period").dt.year().alias("year"),
-            pl.col("period").dt.month().alias("month"),
-        )
-
-        monthly_avg_price = (
-            df.group_by(["year", "month"])
-            .agg(pl.col("value").mean().alias("monthly_avg_price"))
-            .sort("year", "month")
-        )
-
-        return monthly_avg_price
+        return monthly_avg_price
+
+    def monthly_heating_season_propane_price_per_gal(
+        self, state: str, start_date: datetime.date, end_date: datetime.date
+    ) -> pl.DataFrame:
+        """Get a state's average propane price in dollars per gal.
+
+        Note:
+            Only these states are tracked, and only for the months October through Marc:
+                * AL
+                * AR
+                * CO
+                * CT
+                * DE
+                * FL
+                * GA
+                * IL
+                * IN
+                * KS
+                * KY
+                * KY
+                * MA
+                * MD
+                * ME
+                * MI
+                * MN
+                * MO
+                * MS
+                * MT
+                * NC
+                * ND
+                * NE
+                * NH
+                * NJ
+                * NY
+                * OH
+                * OK
+                * PA
+                * RI
+                * SD
+                * TN
+                * TX
+                * UT
+                * VA
+                * VT
+                * WI
+
+        Args:
+            state (str): 2 character postal code
+            start_date (datetime.date): the start date, inclusive
+            end_date (datetime.date): the end date, non inclusive
+
+        Returns:
+            dict: _description_
+        """
+        # heating season is Oct - march, $/gal
+        url = f"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[process][]=PRS&facets[duoarea][]=S{state}&facets[product][]=EPLLPA&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
+
+        eia_request = req_get_wrapper(url)
+        eia_request.raise_for_status()
+
+        json = eia_request.json()
+        # return self.price_json_to_dict(eia_request.json())
+        df = pl.DataFrame(json["response"]["data"])
+        # df = df.with_columns(pl.col("period").str.to_date().alias("period"))
+        df = df.with_columns(pl.col("period").str.strptime(pl.Date))
+        df = df.with_columns(
+            pl.col("period").dt.year().alias("year"),
+            pl.col("period").dt.month().alias("month"),
+        )
+
+        monthly_avg_price = (
+            df.group_by(["year", "month"])
+            .agg(pl.col("value").mean().alias("monthly_avg_price"))
+            .sort("year", "month")
+        )
 
-    def monthly_price_per_mbtu_by_energy_type(
-        self,
-        energy_type: EnergyType,
-        state: str,
-        start_date: datetime.date,
-        end_date: datetime.date,
-    ) -> dict[str, str | EnergyType | float]:
-        """Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation
-        for changes to data collection during certain years.
-
-        Args:
-            energy_type (EnergyType): The energy type
-            state (str): the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected
-            start_date (datetime.date): the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned
-            end_date (datetime.date): the date for which to end the search. Non inclusive
-
-        Raises:
-            NotImplementedError: Invalid energy type
-
-        Returns:
-            dict: year-month: price in USD to BTU
-        """
-        if len(state) > 2:
-            state = sts.lookup(state).abbr  # type: ignore
-        match energy_type:
-            case self.EnergyType.PROPANE:
-                return self.price_per_mbtu_with_efficiency(
-                    self.price_to_clean_dict(
-                        self.monthly_heating_season_propane_price_per_gal(
-                            state, start_date, end_date
-                        ),
-                        energy_type,
-                        state,
-                    )
-                )
-            case self.EnergyType.NATURAL_GAS:
-                return self.price_per_mbtu_with_efficiency(
-                    self.price_to_clean_dict(
-                        self.monthly_ng_price_per_mcf(state, start_date, end_date),
-                        energy_type,
-                        state,
-                    )
-                )
-            case self.EnergyType.ELECTRICITY:
-                return self.price_per_mbtu_with_efficiency(
-                    self.price_to_clean_dict(
-                        self.monthly_electricity_price_per_kwh(
-                            state, start_date, end_date
-                        ),
-                        energy_type,
-                        state,
-                    )
-                )
-            case self.EnergyType.HEATING_OIL:
-                return self.price_per_mbtu_with_efficiency(
-                    self.price_to_clean_dict(
-                        self.monthly_heating_season_heating_oil_price_per_gal(
-                            state, start_date, end_date
-                        ),
-                        energy_type,
-                        state,
-                    )
-                )
-            case _:
-                raise NotImplementedError(f"Unsupported energy type: {energy_type}")
-
-    def monthly_price_per_mbtu_by_energy_type_by_state(
-        self, state: str, start_date: datetime.date, end_date: datetime.date
-    ) -> list[Any]:
-        """Get all available energy prices per MBTU, taking efficiency into account, for a state.
-
-        Note:
-            Please keep times to within a year. For the non oil and propane, you have to go a month past.
-
-        Args:
-            state (str): 2 character postal code
-            start_date (datetime.date): start date
-            end_date (datetime.date): end date
-
-        Returns:
-            list[Any]: list of price dicts for available energy types for a state
-        """
-        if len(state) > 2:
-            state = sts.lookup(state).abbr  # type: ignore
-
-        dicts_to_return = []
-        if state in self.HEATING_OIL_STATES_ABBR:
-            dicts_to_return.append(
-                self.monthly_price_per_mbtu_by_energy_type(
-                    self.EnergyType.HEATING_OIL, state, start_date, end_date
-                )
-            )
-        if state in self.PROPANE_STATES_ABBR:
-            dicts_to_return.append(
-                self.monthly_price_per_mbtu_by_energy_type(
-                    self.EnergyType.PROPANE, state, start_date, end_date
-                )
-            )
-        dicts_to_return.append(
-            self.monthly_price_per_mbtu_by_energy_type(
-                self.EnergyType.NATURAL_GAS, state, start_date, end_date
-            )
-        )
-        dicts_to_return.append(
-            self.monthly_price_per_mbtu_by_energy_type(
-                self.EnergyType.ELECTRICITY, state, start_date, end_date
-            )
-        )
-        log(f"{dicts_to_return = }", "debug")
-        return dicts_to_return
+        return monthly_avg_price
+
+    def monthly_price_per_mbtu_by_energy_type(
+        self,
+        energy_type: EnergyType,
+        state: str,
+        start_date: datetime.date,
+        end_date: datetime.date,
+    ) -> dict[str, str | EnergyType | float]:
+        """Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation
+        for changes to data collection during certain years.
+
+        Args:
+            energy_type (EnergyType): The energy type
+            state (str): the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected
+            start_date (datetime.date): the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned
+            end_date (datetime.date): the date for which to end the search. Non inclusive
+
+        Raises:
+            NotImplementedError: Invalid energy type
+
+        Returns:
+            dict: year-month: price in USD to BTU
+        """
+        if len(state) > 2:
+            state = sts.lookup(state).abbr  # type: ignore
+        match energy_type:
+            case self.EnergyType.PROPANE:
+                return self.price_per_mbtu_with_efficiency(
+                    self.price_to_clean_dict(
+                        self.monthly_heating_season_propane_price_per_gal(
+                            state, start_date, end_date
+                        ),
+                        energy_type,
+                        state,
+                    )
+                )
+            case self.EnergyType.NATURAL_GAS:
+                return self.price_per_mbtu_with_efficiency(
+                    self.price_to_clean_dict(
+                        self.monthly_ng_price_per_mcf(state, start_date, end_date),
+                        energy_type,
+                        state,
+                    )
+                )
+            case self.EnergyType.ELECTRICITY:
+                return self.price_per_mbtu_with_efficiency(
+                    self.price_to_clean_dict(
+                        self.monthly_electricity_price_per_kwh(
+                            state, start_date, end_date
+                        ),
+                        energy_type,
+                        state,
+                    )
+                )
+            case self.EnergyType.HEATING_OIL:
+                return self.price_per_mbtu_with_efficiency(
+                    self.price_to_clean_dict(
+                        self.monthly_heating_season_heating_oil_price_per_gal(
+                            state, start_date, end_date
+                        ),
+                        energy_type,
+                        state,
+                    )
+                )
+            case _:
+                raise NotImplementedError(f"Unsupported energy type: {energy_type}")
+
+    def monthly_price_per_mbtu_by_energy_type_by_state(
+        self, state: str, start_date: datetime.date, end_date: datetime.date
+    ) -> list[Any]:
+        """Get all available energy prices per MBTU, taking efficiency into account, for a state.
+
+        Note:
+            Please keep times to within a year. For the non oil and propane, you have to go a month past.
+
+        Args:
+            state (str): 2 character postal code
+            start_date (datetime.date): start date
+            end_date (datetime.date): end date
+
+        Returns:
+            list[Any]: list of price dicts for available energy types for a state
+        """
+        if len(state) > 2:
+            state = sts.lookup(state).abbr  # type: ignore
+
+        dicts_to_return = []
+        if state in self.HEATING_OIL_STATES_ABBR:
+            dicts_to_return.append(
+                self.monthly_price_per_mbtu_by_energy_type(
+                    self.EnergyType.HEATING_OIL, state, start_date, end_date
+                )
+            )
+        if state in self.PROPANE_STATES_ABBR:
+            dicts_to_return.append(
+                self.monthly_price_per_mbtu_by_energy_type(
+                    self.EnergyType.PROPANE, state, start_date, end_date
+                )
+            )
+        dicts_to_return.append(
+            self.monthly_price_per_mbtu_by_energy_type(
+                self.EnergyType.NATURAL_GAS, state, start_date, end_date
+            )
+        )
+        dicts_to_return.append(
+            self.monthly_price_per_mbtu_by_energy_type(
+                self.EnergyType.ELECTRICITY, state, start_date, end_date
+            )
+        )
+        log(f"{dicts_to_return = }", "debug")
+        return dicts_to_return
 
@@ -4181,9 +3432,7 @@

Source code in src\backend\secondarydata.py -
127
-128
-129
+              
129
 130
 131
 132
@@ -4203,29 +3452,31 @@ 

146 147 148 -149

class HeaterEfficiencies(Enum):
-    """Combination of system efficiency and distribution efficiency.
-
-    Note:
-        Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/
-    """
-
-    HEAT_PUMP_GEOTHERMAL = 3.69
-    HEAT_PUMP_DUCTLESS = 2.7  # mini split
-    HEAT_PUMP_DUCTED = 2.16
-    BASEBOARD = 1
-    KEROSENE_ROOM_HEATER = 0.87
-    PROPANE_BOILER = 0.837
-    NG_BOILER = 0.828
-    NG_ROOM_HEATER = 0.81
-    PROPANE_ROOM_HEATER = 0.81
-    OIL_BOILER = 0.783
-    WOOD_STOVE = 0.75
-    PELLET_STOVE = 0.75
-    NG_FURNACE = 0.744  #! double check this value
-    PROPANE_FURNACE = 0.744
-    OIL_FURNACE = 0.704
-    PELLET_BOILER = 0.639
+149
+150
+151
class HeaterEfficiencies(Enum):
+    """Combination of system efficiency and distribution efficiency.
+
+    Note:
+        Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/
+    """
+
+    HEAT_PUMP_GEOTHERMAL = 3.69
+    HEAT_PUMP_DUCTLESS = 2.7  # mini split
+    HEAT_PUMP_DUCTED = 2.16
+    BASEBOARD = 1
+    KEROSENE_ROOM_HEATER = 0.87
+    PROPANE_BOILER = 0.837
+    NG_BOILER = 0.828
+    NG_ROOM_HEATER = 0.81
+    PROPANE_ROOM_HEATER = 0.81
+    OIL_BOILER = 0.783
+    WOOD_STOVE = 0.75
+    PELLET_STOVE = 0.75
+    NG_FURNACE = 0.744  #! double check this value
+    PROPANE_FURNACE = 0.744
+    OIL_FURNACE = 0.704
+    PELLET_BOILER = 0.639
 
@@ -4354,9 +3605,7 @@

Source code in src\backend\secondarydata.py -
351
-352
-353
+            
353
 354
 355
 356
@@ -4375,28 +3624,30 @@ 

369 370 371 -372

def monthly_electricity_price_per_kwh(
-    self, state: str, start_date: datetime.date, end_date: datetime.date
-) -> dict[str, Any]:
-    """Get a state's average monthly energy price.
-
-    Note:
-        Data is returned in cents/KWh.
-
-    Args:
-        state (str): the 2 character postal code of a state
-        start_date (datetime.date): the start date, inclusive
-        end_date (datetime.date): the end date, non inclusive
-
-    Returns:
-        dict: the dictionary in `year-month: price` form
-    """
-    url = f"{self.eia_base_url}/electricity/retail-sales/data/?frequency=monthly&data[0]=price&facets[stateid][]={state}&facets[sectorid][]=RES&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
-
-    eia_request = req_get_wrapper(url)
-    eia_request.raise_for_status()
-
-    return eia_request.json()
+372
+373
+374
def monthly_electricity_price_per_kwh(
+    self, state: str, start_date: datetime.date, end_date: datetime.date
+) -> dict[str, Any]:
+    """Get a state's average monthly energy price.
+
+    Note:
+        Data is returned in cents/KWh.
+
+    Args:
+        state (str): the 2 character postal code of a state
+        start_date (datetime.date): the start date, inclusive
+        end_date (datetime.date): the end date, non inclusive
+
+    Returns:
+        dict: the dictionary in `year-month: price` form
+    """
+    url = f"{self.eia_base_url}/electricity/retail-sales/data/?frequency=monthly&data[0]=price&facets[stateid][]={state}&facets[sectorid][]=RES&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
+
+    eia_request = req_get_wrapper(url)
+    eia_request.raise_for_status()
+
+    return eia_request.json()
 
@@ -4481,9 +3732,7 @@

Source code in src\backend\secondarydata.py -
398
-399
-400
+            
400
 401
 402
 403
@@ -4546,72 +3795,74 @@ 

460 461 462 -463

def monthly_heating_season_heating_oil_price_per_gal(
-    self, state: str, start_date: datetime.date, end_date: datetime.date
-) -> pl.DataFrame:
-    """Get a state's average heating oil price.
-
-    Note:
-        Data returned is in dollars per gallon.
-
-        Only these states are tracked, and only for the months October through March:
-            * CT
-            * DC
-            * DE
-            * IA
-            * IL
-            * IN
-            * KS
-            * KY
-            * MA
-            * MD
-            * ME
-            * MI
-            * MN
-            * MO
-            * NC
-            * ND
-            * NE
-            * NH
-            * NJ
-            * NY
-            * OH
-            * PA
-            * RI
-            * SD
-            * VA
-            * VT
-            * WI
-    Args:
-        state (str): 2 char postal code
-        start_date (datetime.date): the start date, inclusive
-        end_date (datetime.date): the end date, non inclusive
-
-    Returns:
-        dict: _description_
-    """
-    # heating season is Oct - march, $/gal
-    url = f"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[duoarea][]=S{state}&facets[product][]=EPD2F&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
-
-    eia_request = req_get_wrapper(url)
-    eia_request.raise_for_status()
-
-    json = eia_request.json()
-    df = pl.DataFrame(json["response"]["data"])
-    # becomes int, so months are sig figs
-    df = df.with_columns(pl.col("period").str.strptime(pl.Date))
-    df = df.with_columns(
-        pl.col("period").dt.year().alias("year"),
-        pl.col("period").dt.month().alias("month"),
-    )
-
-    monthly_avg_price = (
-        df.group_by(["year", "month"])
-        .agg(pl.col("value").mean().alias("monthly_avg_price"))
-        .sort("year", "month")
-    )
-
-    return monthly_avg_price
+463
+464
+465
def monthly_heating_season_heating_oil_price_per_gal(
+    self, state: str, start_date: datetime.date, end_date: datetime.date
+) -> pl.DataFrame:
+    """Get a state's average heating oil price.
+
+    Note:
+        Data returned is in dollars per gallon.
+
+        Only these states are tracked, and only for the months October through March:
+            * CT
+            * DC
+            * DE
+            * IA
+            * IL
+            * IN
+            * KS
+            * KY
+            * MA
+            * MD
+            * ME
+            * MI
+            * MN
+            * MO
+            * NC
+            * ND
+            * NE
+            * NH
+            * NJ
+            * NY
+            * OH
+            * PA
+            * RI
+            * SD
+            * VA
+            * VT
+            * WI
+    Args:
+        state (str): 2 char postal code
+        start_date (datetime.date): the start date, inclusive
+        end_date (datetime.date): the end date, non inclusive
+
+    Returns:
+        dict: _description_
+    """
+    # heating season is Oct - march, $/gal
+    url = f"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[duoarea][]=S{state}&facets[product][]=EPD2F&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
+
+    eia_request = req_get_wrapper(url)
+    eia_request.raise_for_status()
+
+    json = eia_request.json()
+    df = pl.DataFrame(json["response"]["data"])
+    # becomes int, so months are sig figs
+    df = df.with_columns(pl.col("period").str.strptime(pl.Date))
+    df = df.with_columns(
+        pl.col("period").dt.year().alias("year"),
+        pl.col("period").dt.month().alias("month"),
+    )
+
+    monthly_avg_price = (
+        df.group_by(["year", "month"])
+        .agg(pl.col("value").mean().alias("monthly_avg_price"))
+        .sort("year", "month")
+    )
+
+    return monthly_avg_price
 
@@ -4759,9 +4010,7 @@

Source code in src\backend\secondarydata.py -
465
-466
-467
+            
467
 468
 469
 470
@@ -4834,82 +4083,84 @@ 

537 538 539 -540

def monthly_heating_season_propane_price_per_gal(
-    self, state: str, start_date: datetime.date, end_date: datetime.date
-) -> pl.DataFrame:
-    """Get a state's average propane price in dollars per gal.
-
-    Note:
-        Only these states are tracked, and only for the months October through Marc:
-            * AL
-            * AR
-            * CO
-            * CT
-            * DE
-            * FL
-            * GA
-            * IL
-            * IN
-            * KS
-            * KY
-            * KY
-            * MA
-            * MD
-            * ME
-            * MI
-            * MN
-            * MO
-            * MS
-            * MT
-            * NC
-            * ND
-            * NE
-            * NH
-            * NJ
-            * NY
-            * OH
-            * OK
-            * PA
-            * RI
-            * SD
-            * TN
-            * TX
-            * UT
-            * VA
-            * VT
-            * WI
-
-    Args:
-        state (str): 2 character postal code
-        start_date (datetime.date): the start date, inclusive
-        end_date (datetime.date): the end date, non inclusive
-
-    Returns:
-        dict: _description_
-    """
-    # heating season is Oct - march, $/gal
-    url = f"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[process][]=PRS&facets[duoarea][]=S{state}&facets[product][]=EPLLPA&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
-
-    eia_request = req_get_wrapper(url)
-    eia_request.raise_for_status()
-
-    json = eia_request.json()
-    # return self.price_json_to_dict(eia_request.json())
-    df = pl.DataFrame(json["response"]["data"])
-    # df = df.with_columns(pl.col("period").str.to_date().alias("period"))
-    df = df.with_columns(pl.col("period").str.strptime(pl.Date))
-    df = df.with_columns(
-        pl.col("period").dt.year().alias("year"),
-        pl.col("period").dt.month().alias("month"),
-    )
-
-    monthly_avg_price = (
-        df.group_by(["year", "month"])
-        .agg(pl.col("value").mean().alias("monthly_avg_price"))
-        .sort("year", "month")
-    )
-
-    return monthly_avg_price
+540
+541
+542
def monthly_heating_season_propane_price_per_gal(
+    self, state: str, start_date: datetime.date, end_date: datetime.date
+) -> pl.DataFrame:
+    """Get a state's average propane price in dollars per gal.
+
+    Note:
+        Only these states are tracked, and only for the months October through Marc:
+            * AL
+            * AR
+            * CO
+            * CT
+            * DE
+            * FL
+            * GA
+            * IL
+            * IN
+            * KS
+            * KY
+            * KY
+            * MA
+            * MD
+            * ME
+            * MI
+            * MN
+            * MO
+            * MS
+            * MT
+            * NC
+            * ND
+            * NE
+            * NH
+            * NJ
+            * NY
+            * OH
+            * OK
+            * PA
+            * RI
+            * SD
+            * TN
+            * TX
+            * UT
+            * VA
+            * VT
+            * WI
+
+    Args:
+        state (str): 2 character postal code
+        start_date (datetime.date): the start date, inclusive
+        end_date (datetime.date): the end date, non inclusive
+
+    Returns:
+        dict: _description_
+    """
+    # heating season is Oct - march, $/gal
+    url = f"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[process][]=PRS&facets[duoarea][]=S{state}&facets[product][]=EPLLPA&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
+
+    eia_request = req_get_wrapper(url)
+    eia_request.raise_for_status()
+
+    json = eia_request.json()
+    # return self.price_json_to_dict(eia_request.json())
+    df = pl.DataFrame(json["response"]["data"])
+    # df = df.with_columns(pl.col("period").str.to_date().alias("period"))
+    df = df.with_columns(pl.col("period").str.strptime(pl.Date))
+    df = df.with_columns(
+        pl.col("period").dt.year().alias("year"),
+        pl.col("period").dt.month().alias("month"),
+    )
+
+    monthly_avg_price = (
+        df.group_by(["year", "month"])
+        .agg(pl.col("value").mean().alias("monthly_avg_price"))
+        .sort("year", "month")
+    )
+
+    return monthly_avg_price
 
@@ -5020,9 +4271,7 @@

Source code in src\backend\secondarydata.py -
374
-375
-376
+            
376
 377
 378
 379
@@ -5042,29 +4291,31 @@ 

393 394 395 -396

def monthly_ng_price_per_mcf(
-    self, state: str, start_date: datetime.date, end_date: datetime.date
-) -> dict[str, Any]:
-    """Get a state's average natural gas price.
-
-    Note:
-        Data is returned in dollars per mega cubic feet.
-
-    Args:
-        state (str): the 2 character postal code of a state
-        start_date (datetime.date): the start date, inclusive
-        end_date (datetime.date): the end date, non inclusive
-
-    Returns:
-        dict: _description_
-    """
-    # $/mcf
-    url = f"https://api.eia.gov/v2/natural-gas/pri/sum/data/?frequency=monthly&data[0]=value&facets[duoarea][]=S{state}&facets[process][]=PRS&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
-
-    eia_request = req_get_wrapper(url)
-    eia_request.raise_for_status()
-
-    return eia_request.json()
+396
+397
+398
def monthly_ng_price_per_mcf(
+    self, state: str, start_date: datetime.date, end_date: datetime.date
+) -> dict[str, Any]:
+    """Get a state's average natural gas price.
+
+    Note:
+        Data is returned in dollars per mega cubic feet.
+
+    Args:
+        state (str): the 2 character postal code of a state
+        start_date (datetime.date): the start date, inclusive
+        end_date (datetime.date): the end date, non inclusive
+
+    Returns:
+        dict: _description_
+    """
+    # $/mcf
+    url = f"https://api.eia.gov/v2/natural-gas/pri/sum/data/?frequency=monthly&data[0]=value&facets[duoarea][]=S{state}&facets[process][]=PRS&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}"
+
+    eia_request = req_get_wrapper(url)
+    eia_request.raise_for_status()
+
+    return eia_request.json()
 
@@ -5210,9 +4461,7 @@

Source code in src\backend\secondarydata.py -
542
-543
-544
+            
544
 545
 546
 547
@@ -5274,71 +4523,73 @@ 

603 604 605 -606

def monthly_price_per_mbtu_by_energy_type(
-    self,
-    energy_type: EnergyType,
-    state: str,
-    start_date: datetime.date,
-    end_date: datetime.date,
-) -> dict[str, str | EnergyType | float]:
-    """Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation
-    for changes to data collection during certain years.
-
-    Args:
-        energy_type (EnergyType): The energy type
-        state (str): the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected
-        start_date (datetime.date): the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned
-        end_date (datetime.date): the date for which to end the search. Non inclusive
-
-    Raises:
-        NotImplementedError: Invalid energy type
-
-    Returns:
-        dict: year-month: price in USD to BTU
-    """
-    if len(state) > 2:
-        state = sts.lookup(state).abbr  # type: ignore
-    match energy_type:
-        case self.EnergyType.PROPANE:
-            return self.price_per_mbtu_with_efficiency(
-                self.price_to_clean_dict(
-                    self.monthly_heating_season_propane_price_per_gal(
-                        state, start_date, end_date
-                    ),
-                    energy_type,
-                    state,
-                )
-            )
-        case self.EnergyType.NATURAL_GAS:
-            return self.price_per_mbtu_with_efficiency(
-                self.price_to_clean_dict(
-                    self.monthly_ng_price_per_mcf(state, start_date, end_date),
-                    energy_type,
-                    state,
-                )
-            )
-        case self.EnergyType.ELECTRICITY:
-            return self.price_per_mbtu_with_efficiency(
-                self.price_to_clean_dict(
-                    self.monthly_electricity_price_per_kwh(
-                        state, start_date, end_date
-                    ),
-                    energy_type,
-                    state,
-                )
-            )
-        case self.EnergyType.HEATING_OIL:
-            return self.price_per_mbtu_with_efficiency(
-                self.price_to_clean_dict(
-                    self.monthly_heating_season_heating_oil_price_per_gal(
-                        state, start_date, end_date
-                    ),
-                    energy_type,
-                    state,
-                )
-            )
-        case _:
-            raise NotImplementedError(f"Unsupported energy type: {energy_type}")
+606
+607
+608
def monthly_price_per_mbtu_by_energy_type(
+    self,
+    energy_type: EnergyType,
+    state: str,
+    start_date: datetime.date,
+    end_date: datetime.date,
+) -> dict[str, str | EnergyType | float]:
+    """Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation
+    for changes to data collection during certain years.
+
+    Args:
+        energy_type (EnergyType): The energy type
+        state (str): the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected
+        start_date (datetime.date): the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned
+        end_date (datetime.date): the date for which to end the search. Non inclusive
+
+    Raises:
+        NotImplementedError: Invalid energy type
+
+    Returns:
+        dict: year-month: price in USD to BTU
+    """
+    if len(state) > 2:
+        state = sts.lookup(state).abbr  # type: ignore
+    match energy_type:
+        case self.EnergyType.PROPANE:
+            return self.price_per_mbtu_with_efficiency(
+                self.price_to_clean_dict(
+                    self.monthly_heating_season_propane_price_per_gal(
+                        state, start_date, end_date
+                    ),
+                    energy_type,
+                    state,
+                )
+            )
+        case self.EnergyType.NATURAL_GAS:
+            return self.price_per_mbtu_with_efficiency(
+                self.price_to_clean_dict(
+                    self.monthly_ng_price_per_mcf(state, start_date, end_date),
+                    energy_type,
+                    state,
+                )
+            )
+        case self.EnergyType.ELECTRICITY:
+            return self.price_per_mbtu_with_efficiency(
+                self.price_to_clean_dict(
+                    self.monthly_electricity_price_per_kwh(
+                        state, start_date, end_date
+                    ),
+                    energy_type,
+                    state,
+                )
+            )
+        case self.EnergyType.HEATING_OIL:
+            return self.price_per_mbtu_with_efficiency(
+                self.price_to_clean_dict(
+                    self.monthly_heating_season_heating_oil_price_per_gal(
+                        state, start_date, end_date
+                    ),
+                    energy_type,
+                    state,
+                )
+            )
+        case _:
+            raise NotImplementedError(f"Unsupported energy type: {energy_type}")
 
@@ -5449,9 +4700,7 @@

Source code in src\backend\secondarydata.py -
608
-609
-610
+            
610
 611
 612
 613
@@ -5492,50 +4741,52 @@ 

648 649 650 -651

def monthly_price_per_mbtu_by_energy_type_by_state(
-    self, state: str, start_date: datetime.date, end_date: datetime.date
-) -> list[Any]:
-    """Get all available energy prices per MBTU, taking efficiency into account, for a state.
-
-    Note:
-        Please keep times to within a year. For the non oil and propane, you have to go a month past.
-
-    Args:
-        state (str): 2 character postal code
-        start_date (datetime.date): start date
-        end_date (datetime.date): end date
-
-    Returns:
-        list[Any]: list of price dicts for available energy types for a state
-    """
-    if len(state) > 2:
-        state = sts.lookup(state).abbr  # type: ignore
-
-    dicts_to_return = []
-    if state in self.HEATING_OIL_STATES_ABBR:
-        dicts_to_return.append(
-            self.monthly_price_per_mbtu_by_energy_type(
-                self.EnergyType.HEATING_OIL, state, start_date, end_date
-            )
-        )
-    if state in self.PROPANE_STATES_ABBR:
-        dicts_to_return.append(
-            self.monthly_price_per_mbtu_by_energy_type(
-                self.EnergyType.PROPANE, state, start_date, end_date
-            )
-        )
-    dicts_to_return.append(
-        self.monthly_price_per_mbtu_by_energy_type(
-            self.EnergyType.NATURAL_GAS, state, start_date, end_date
-        )
-    )
-    dicts_to_return.append(
-        self.monthly_price_per_mbtu_by_energy_type(
-            self.EnergyType.ELECTRICITY, state, start_date, end_date
-        )
-    )
-    log(f"{dicts_to_return = }", "debug")
-    return dicts_to_return
+651
+652
+653
def monthly_price_per_mbtu_by_energy_type_by_state(
+    self, state: str, start_date: datetime.date, end_date: datetime.date
+) -> list[Any]:
+    """Get all available energy prices per MBTU, taking efficiency into account, for a state.
+
+    Note:
+        Please keep times to within a year. For the non oil and propane, you have to go a month past.
+
+    Args:
+        state (str): 2 character postal code
+        start_date (datetime.date): start date
+        end_date (datetime.date): end date
+
+    Returns:
+        list[Any]: list of price dicts for available energy types for a state
+    """
+    if len(state) > 2:
+        state = sts.lookup(state).abbr  # type: ignore
+
+    dicts_to_return = []
+    if state in self.HEATING_OIL_STATES_ABBR:
+        dicts_to_return.append(
+            self.monthly_price_per_mbtu_by_energy_type(
+                self.EnergyType.HEATING_OIL, state, start_date, end_date
+            )
+        )
+    if state in self.PROPANE_STATES_ABBR:
+        dicts_to_return.append(
+            self.monthly_price_per_mbtu_by_energy_type(
+                self.EnergyType.PROPANE, state, start_date, end_date
+            )
+        )
+    dicts_to_return.append(
+        self.monthly_price_per_mbtu_by_energy_type(
+            self.EnergyType.NATURAL_GAS, state, start_date, end_date
+        )
+    )
+    dicts_to_return.append(
+        self.monthly_price_per_mbtu_by_energy_type(
+            self.EnergyType.ELECTRICITY, state, start_date, end_date
+        )
+    )
+    log(f"{dicts_to_return = }", "debug")
+    return dicts_to_return
 
@@ -5642,9 +4893,7 @@

Source code in src\backend\secondarydata.py -
303
-304
-305
+            
305
 306
 307
 308
@@ -5662,27 +4911,29 @@ 

320 321 322 -323

def price_df_to_clean_dict(
-    self, eia_df: pl.DataFrame, energy_type: EnergyType, state: str
-) -> dict[str, str | EnergyType | float]:
-    """Clean DataFrame data consisting of EIA API data.
-
-    Args:
-        eia_df (pl.DataFrame): the DataFrame to clean
-        energy_type (EnergyType): the energy type
-        state (str): the state
-
-    Returns:
-        dict[str, str|EnergyType|float]: the dict
-    """
-    result_dict = {}
-    for row in eia_df.rows(named=True):
-        year_month = f"{row.get("year")}-{row.get("month"):02}"
-        if row.get("monthly_avg_price") is not None:
-            result_dict[year_month] = round(row.get("monthly_avg_price"), 3)  # type: ignore
-    result_dict["type"] = energy_type.value
-    result_dict["state"] = state
-    return result_dict
+323
+324
+325
def price_df_to_clean_dict(
+    self, eia_df: pl.DataFrame, energy_type: EnergyType, state: str
+) -> dict[str, str | EnergyType | float]:
+    """Clean DataFrame data consisting of EIA API data.
+
+    Args:
+        eia_df (pl.DataFrame): the DataFrame to clean
+        energy_type (EnergyType): the energy type
+        state (str): the state
+
+    Returns:
+        dict[str, str|EnergyType|float]: the dict
+    """
+    result_dict = {}
+    for row in eia_df.rows(named=True):
+        year_month = f"{row.get("year")}-{row.get("month"):02}"
+        if row.get("monthly_avg_price") is not None:
+            result_dict[year_month] = round(row.get("monthly_avg_price"), 3)  # type: ignore
+    result_dict["type"] = energy_type.value
+    result_dict["state"] = state
+    return result_dict
 
@@ -5789,9 +5040,7 @@

Source code in src\backend\secondarydata.py -
276
-277
-278
+            
278
 279
 280
 281
@@ -5814,32 +5063,34 @@ 

298 299 300 -301

def price_dict_to_clean_dict(
-    self, eia_json: dict, energy_type: EnergyType, state: str
-) -> dict[str, str | EnergyType | float]:
-    """Clean JSON data returned by EIA's API.
-
-    Args:
-        eia_json (dict): the response JSON
-        energy_type (EnergyType): the energy type
-        state (str): the state
-
-    Returns:
-        dict[str, str | EnergyType | float]: cleaned JSON
-    """
-    # price key is different for electricity
-    accessor = "value"
-    if "product" not in eia_json["response"]["data"][0]:
-        accessor = "price"
-
-    result_dict = {
-        entry["period"]: entry[f"{accessor}"]
-        for entry in eia_json["response"]["data"]
-    }
-    result_dict["type"] = energy_type.value
-    result_dict["state"] = state
-
-    return result_dict
+301
+302
+303
def price_dict_to_clean_dict(
+    self, eia_json: dict, energy_type: EnergyType, state: str
+) -> dict[str, str | EnergyType | float]:
+    """Clean JSON data returned by EIA's API.
+
+    Args:
+        eia_json (dict): the response JSON
+        energy_type (EnergyType): the energy type
+        state (str): the state
+
+    Returns:
+        dict[str, str | EnergyType | float]: cleaned JSON
+    """
+    # price key is different for electricity
+    accessor = "value"
+    if "product" not in eia_json["response"]["data"][0]:
+        accessor = "price"
+
+    result_dict = {
+        entry["period"]: entry[f"{accessor}"]
+        for entry in eia_json["response"]["data"]
+    }
+    result_dict["type"] = energy_type.value
+    result_dict["state"] = state
+
+    return result_dict
 
@@ -5926,9 +5177,7 @@

Source code in src\backend\secondarydata.py -
188
-189
-190
+            
190
 191
 192
 193
@@ -6011,92 +5260,94 @@ 

270 271 272 -273

def price_per_mbtu_with_efficiency(
-    self, energy_price_dict: dict
-) -> dict[str, str | EnergyType | float]:
-    """Convert an energy source's price per quantity into price per BTU with an efficiency.
-
-    Note:
-        Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf
-
-    See also:
-        `EIADataRetriever.HeaterEfficiencies`
-
-    Args:
-        energy_price_dict (dict): energy source json
-
-    Returns:
-        dict: new dictionary with btu centric pricing
-    """
-    #! make new function based on burner type/ end usage type
-    CENTS_IN_DOLLAR = 100
-    match energy_price_dict.get("type"):
-        case self.EnergyType.PROPANE.value:
-            # for loop is done for every case since i dont want to use `eval` or parse a string of division to keep PEMDAS. this is why i dont have an efficiency func yet
-            for key, value in energy_price_dict.items():
-                if (
-                    key in ["type", "state", None]
-                    or energy_price_dict.get(key) is None
-                ):
-                    continue
-                energy_price_dict[key] = (
-                    value
-                    / (
-                        self.FuelBTUConversion.PROPANE_BTU_PER_GAL.value
-                        * self.HeaterEfficiencies.PROPANE_FURNACE.value
-                    )
-                    * 1_000
-                )
-        case self.EnergyType.NATURAL_GAS.value:
-            for key, value in energy_price_dict.items():
-                if (
-                    key in ["type", "state", None]
-                    or energy_price_dict.get(key) is None
-                ):
-                    continue
-                energy_price_dict[key] = (
-                    value
-                    / (
-                        self.FuelBTUConversion.NG_BTU_PER_MCT.value
-                        * self.HeaterEfficiencies.NG_FURNACE.value
-                    )
-                    * 1_000
-                )
-        case self.EnergyType.ELECTRICITY.value:
-            for key, value in energy_price_dict.items():
-                if (
-                    key in ["type", "state", None]
-                    or energy_price_dict.get(key) is None
-                ):
-                    continue
-                energy_price_dict[key] = (
-                    value
-                    / CENTS_IN_DOLLAR
-                    / (
-                        self.FuelBTUConversion.ELECTRICITY_BTU_PER_KWH.value
-                        * self.HeaterEfficiencies.HEAT_PUMP_DUCTED.value
-                    )
-                    * 1_000
-                )
-        case self.EnergyType.HEATING_OIL.value:
-            for key, value in energy_price_dict.items():
-                if (
-                    key in ["type", "state", None]
-                    or energy_price_dict.get(key) is None
-                ):
-                    continue
-                energy_price_dict[key] = (
-                    value
-                    / (
-                        self.FuelBTUConversion.HEATING_OIL_BTU_PER_GAL.value
-                        * self.HeaterEfficiencies.OIL_BOILER.value
-                    )
-                    * 1_000
-                )
-        case _:
-            log("Could not translate dict to btu per price.", "warn")
-
-    return energy_price_dict
+273
+274
+275
def price_per_mbtu_with_efficiency(
+    self, energy_price_dict: dict
+) -> dict[str, str | EnergyType | float]:
+    """Convert an energy source's price per quantity into price per BTU with an efficiency.
+
+    Note:
+        Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf
+
+    See also:
+        `EIADataRetriever.HeaterEfficiencies`
+
+    Args:
+        energy_price_dict (dict): energy source json
+
+    Returns:
+        dict: new dictionary with btu centric pricing
+    """
+    #! make new function based on burner type/ end usage type
+    CENTS_IN_DOLLAR = 100
+    match energy_price_dict.get("type"):
+        case self.EnergyType.PROPANE.value:
+            # for loop is done for every case since i dont want to use `eval` or parse a string of division to keep PEMDAS. this is why i dont have an efficiency func yet
+            for key, value in energy_price_dict.items():
+                if (
+                    key in ["type", "state", None]
+                    or energy_price_dict.get(key) is None
+                ):
+                    continue
+                energy_price_dict[key] = (
+                    value
+                    / (
+                        self.FuelBTUConversion.PROPANE_BTU_PER_GAL.value
+                        * self.HeaterEfficiencies.PROPANE_FURNACE.value
+                    )
+                    * 1_000
+                )
+        case self.EnergyType.NATURAL_GAS.value:
+            for key, value in energy_price_dict.items():
+                if (
+                    key in ["type", "state", None]
+                    or energy_price_dict.get(key) is None
+                ):
+                    continue
+                energy_price_dict[key] = (
+                    value
+                    / (
+                        self.FuelBTUConversion.NG_BTU_PER_MCT.value
+                        * self.HeaterEfficiencies.NG_FURNACE.value
+                    )
+                    * 1_000
+                )
+        case self.EnergyType.ELECTRICITY.value:
+            for key, value in energy_price_dict.items():
+                if (
+                    key in ["type", "state", None]
+                    or energy_price_dict.get(key) is None
+                ):
+                    continue
+                energy_price_dict[key] = (
+                    value
+                    / CENTS_IN_DOLLAR
+                    / (
+                        self.FuelBTUConversion.ELECTRICITY_BTU_PER_KWH.value
+                        * self.HeaterEfficiencies.HEAT_PUMP_DUCTED.value
+                    )
+                    * 1_000
+                )
+        case self.EnergyType.HEATING_OIL.value:
+            for key, value in energy_price_dict.items():
+                if (
+                    key in ["type", "state", None]
+                    or energy_price_dict.get(key) is None
+                ):
+                    continue
+                energy_price_dict[key] = (
+                    value
+                    / (
+                        self.FuelBTUConversion.HEATING_OIL_BTU_PER_GAL.value
+                        * self.HeaterEfficiencies.OIL_BOILER.value
+                    )
+                    * 1_000
+                )
+        case _:
+            log("Could not translate dict to btu per price.", "warn")
+
+    return energy_price_dict
 
@@ -6227,9 +5478,7 @@

Source code in src\backend\secondarydata.py -
326
-327
-328
+            
328
 329
 330
 331
@@ -6249,29 +5498,31 @@ 

345 346 347 -348

def price_to_clean_dict(
-    self, price_struct: dict | pl.DataFrame, energy_type: EnergyType, state: str
-) -> dict[str, str | EnergyType | float]:
-    """Handle the different data types that EIA data could be stored in.
-
-    Args:
-        price_struct (dict | pl.DataFrame): a data structure containing the year, month, and price info
-        energy_type (EnergyType): the energy type
-        state (str): the state
-
-    Raises:
-        TypeError: raised if the type of `price_struct` is not supported
-
-    Returns:
-        dict[str, str|EnergyType|float]: the normalized and structured data in dict form
-    """
-    match price_struct:
-        case dict():
-            return self.price_dict_to_clean_dict(price_struct, energy_type, state)
-        case pl.DataFrame():
-            return self.price_df_to_clean_dict(price_struct, energy_type, state)
-        case _:
-            raise TypeError(f"Type not supported: {type(energy_type)}")
+348
+349
+350
def price_to_clean_dict(
+    self, price_struct: dict | pl.DataFrame, energy_type: EnergyType, state: str
+) -> dict[str, str | EnergyType | float]:
+    """Handle the different data types that EIA data could be stored in.
+
+    Args:
+        price_struct (dict | pl.DataFrame): a data structure containing the year, month, and price info
+        energy_type (EnergyType): the energy type
+        state (str): the state
+
+    Raises:
+        TypeError: raised if the type of `price_struct` is not supported
+
+    Returns:
+        dict[str, str|EnergyType|float]: the normalized and structured data in dict form
+    """
+    match price_struct:
+        case dict():
+            return self.price_dict_to_clean_dict(price_struct, energy_type, state)
+        case pl.DataFrame():
+            return self.price_df_to_clean_dict(price_struct, energy_type, state)
+        case _:
+            raise TypeError(f"Type not supported: {type(energy_type)}")
 
diff --git a/gen_ref_pages.py b/gen_ref_pages.py index 305e068..051a29b 100755 --- a/gen_ref_pages.py +++ b/gen_ref_pages.py @@ -4,7 +4,6 @@ from pathlib import Path import mkdocs_gen_files -# maybe take main out later when we have the gui, but for now its not needed. exclude_words = ["__init__", "csv_merge", "main"] src_path = Path(__file__).parent.parent / "src" @@ -22,64 +21,3 @@ mkdocs_gen_files.set_edit_path(doc_path, path) -# for path in sorted(src_path.rglob("*.py")): -# filename = path.with_suffix("") -# rel_filename = path.relative_to("src").with_suffix("") - -# filename_parts = filename.parts - -# if [elem for elem in exclude_words if elem in [part for part in filename_parts]]: -# continue -# print(".".join(filename_parts)) -# with mkdocs_gen_files.open(rel_filename.with_suffix(".md"), "w") as fd: -# print("::: " + ".".join(filename_parts), file=fd) - -# mkdocs_gen_files.set_edit_path(rel_filename.with_suffix(".md"), "gen_ref_pages.py") - - -# # src_folder = "src" -# # for path in sorted(Path("src_folder").rglob("*.py")): -# # md_filename = path.relative_to("src_folder").with_suffix(".md") -# # py_filename = path.relative_to("src_folder") - -# # parts = py_filename.parts -# # #dont document the __init__ file in modules -# # if parts[-1] == "__init__": -# # parts = parts[:-1] -# # elif parts[-1] == "__main__": -# # continue - -# # with mkdocs_gen_files.open(md_filename, "w") as fd: -# # # get last part and change to py -# # print("::: " + src_folder + ".".join(parts), file=fd) - -# # mkdocs_gen_files.set_edit_path(md_filename, "gen_ref_pages.py") - -# # for total in range(19, 100, 20): -# # filename = f"sample/{total}-bottles.md" - -# # with mkdocs_gen_files.open(filename, "w") as f: -# # for i in reversed(range(1, total + 1)): -# # print(f"{i} bottles of beer on the wall, {i} bottles of beer ", file=f) -# # print(f"Take one down and pass it around, **{i-1}** bottles of beer on the wall\n", file=f) - -# # mkdocs_gen_files.set_edit_path(filename, "gen_ref_pages.py") - - -# with mkdocs_gen_files.open("src/test/RedfinSearcher.md", "w") as fd: -# # get last part and change to py -# print("::: " + "src.test.RedfinSearcher", file=fd) -# # print("handler: python") -# # print("options:") -# # print("members:") -# # print("- set_filters_path") -# # print("- set_filters_path") -# # options: -# # members: -# # - set_filters_path -# # - generate_filters_path -# # show_root_heading: false -# # show_source: true -# # """) - -# mkdocs_gen_files.set_edit_path("src/test/RedfinSearcher.md", "gen_ref_pages.py") diff --git a/gui/datapage/index.html b/gui/datapage/index.html index 368211e..9af3777 100755 --- a/gui/datapage/index.html +++ b/gui/datapage/index.html @@ -756,8 +756,7 @@

Source code in src\gui\datapage.py -
 23
- 24
+              
 24
  25
  26
  27
@@ -1107,358 +1106,411 @@ 

371 372 373 -374

class DataPage(ctk.CTkFrame):
-    """Crate page for displaying energy data and links to censusreporter.org for census level data"""
-
-    def __init__(self, master, **kwargs):
-        super().__init__(master, **kwargs)
-        self.msa_name = None
-        self.income_df = None
-        self.demog_df = None
-        self.states_in_msa = None
-        self.state_demog_dfs = None
-        self.state_income_dfs = None
-        self.cur_year = datetime.datetime.now().year
-        self.years = [
-            str(self.cur_year),
-            str(self.cur_year - 1),
-            str(self.cur_year - 2),
-            str(self.cur_year - 3),
-            str(self.cur_year - 4),
-        ]
-        self.roboto_font = ctk.CTkFont(family="Roboto")
-        self.roboto_header_font = ctk.CTkFont(family="Roboto", size=28)
-        self.roboto_link_font = ctk.CTkFont(family="Roboto", underline=True, size=20)
-        self.create_widgets()
-
-    def create_widgets(self) -> None:
-        """Create widgets."""
-        # bug in sockets library wont allow you to raise keyboardinterrupt, so stopping
-        # Content frame will have 4 rows. first will be header, 2nd is energy graph, 3rd will contain a frame that has censusreport.org links, 4th will have progress bar frame
-        self.content_frame = ctk.CTkFrame(self, border_width=2)
-        self.content_banner_frame = ctk.CTkFrame(self.content_frame, border_width=2)
-        self.state_and_year_content_banner_dropdown_frame = ctk.CTkFrame(
-            self.content_banner_frame, border_width=2
-        )
-        self.census_reporter_frame = ctk.CTkFrame(self.content_frame, border_width=2)
-        self.log_frame = ctk.CTkFrame(self.content_frame, border_width=2)
-
-        self.content_banner_main_text = ctk.CTkLabel(
-            self.content_banner_frame,
-            text="Census and Energy Data:",
-            font=self.roboto_header_font,
-        )
-        self.content_banner_main_text.bind(
-            "<Configure>",
-            command=lambda x: self.content_banner_main_text.configure(
-                wraplength=self.content_banner_main_text._current_width
-                - 40  # random padding
-            ),
-        )
-        # nested frame for holding filters and text inside banner frame
-
-        self.select_state_label = ctk.CTkLabel(
-            self.state_and_year_content_banner_dropdown_frame,
-            text="Select State",
-            font=self.roboto_font,
-        )
-        self.select_state_dropdown = ctk.CTkOptionMenu(
-            self.state_and_year_content_banner_dropdown_frame,
-            values=None,
-            command=self.state_dropdown_callback,
-        )
-
-        self.select_year_label = ctk.CTkLabel(
-            self.state_and_year_content_banner_dropdown_frame,
-            text="Select Year",
-            font=self.roboto_font,
-        )
-        self.select_year_dropdown = ctk.CTkOptionMenu(
-            self.state_and_year_content_banner_dropdown_frame,
-            values=self.years,
-            command=self.year_dropdown_callback,
-        )
-
-        self.energy_graph_frame = ctk.CTkFrame(self.content_frame, border_width=2)
-
-        self.census_reporter_state_label = ctk.CTkLabel(
-            self.census_reporter_frame,
-            text="Census Reporter: State Report",
-            font=self.roboto_link_font,
-            cursor="hand2",
-            text_color="blue",
-        )
-
-        self.log_button = ctk.CTkButton(
-            self.log_frame, text="Open Log File", command=self.open_log_file
-        )
-        self.census_reporter_state_label.bind(
-            "<Button-1>", lambda x: self.open_census_reporter_state()
-        )
-        self.census_reporter_metro_label = ctk.CTkLabel(
-            self.census_reporter_frame,
-            text="Census Reporter: Metro Report",
-            font=self.roboto_link_font,
-            cursor="hand2",
-            text_color="blue",
-        )
-        self.census_reporter_metro_label.bind(
-            "<Button-1>", lambda x: self.open_census_reporter_metro()
-        )
-        # create grid
-        # col
-        self.columnconfigure(0, weight=1)
-        self.content_frame.columnconfigure(0, weight=1)
-        self.content_banner_frame.columnconfigure((0, 1), weight=1)
-        self.state_and_year_content_banner_dropdown_frame.columnconfigure(
-            (0, 1), weight=1
-        )
-        self.energy_graph_frame.columnconfigure(0, weight=1)
-        self.census_reporter_frame.columnconfigure(0, weight=1)
-        self.log_frame.columnconfigure(0, weight=1)
-
-        # row
-        self.rowconfigure(0, weight=1)
-
-        self.content_frame.rowconfigure(0, weight=1)  # banner
-        self.content_frame.rowconfigure(1, weight=5)  # energy graph
-        self.content_frame.rowconfigure(2, weight=2)  # census reporter frame
-        self.content_frame.rowconfigure(3, weight=1)
-
-        self.content_banner_frame.rowconfigure(0, weight=1)
-
-        self.state_and_year_content_banner_dropdown_frame.rowconfigure((0, 1), weight=1)
-
-        self.energy_graph_frame.rowconfigure(0, weight=1)
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
class DataPage(ctk.CTkFrame):
+    """Crate page for displaying energy data and links to censusreporter.org for census level data"""
+
+    def __init__(self, master, **kwargs):
+        super().__init__(master, **kwargs)
+        self.msa_name = None
+        self.income_df = None
+        self.demog_df = None
+        self.states_in_msa = None
+        self.state_demog_dfs = None
+        self.state_income_dfs = None
+        self.cur_year = datetime.datetime.now().year
+        self.years = [
+            str(self.cur_year),
+            str(self.cur_year - 1),
+            str(self.cur_year - 2),
+            str(self.cur_year - 3),
+            str(self.cur_year - 4),
+        ]
+        self.roboto_font = ctk.CTkFont(family="Roboto")
+        self.roboto_header_font = ctk.CTkFont(family="Roboto", size=28)
+        self.roboto_link_font = ctk.CTkFont(family="Roboto", underline=True, size=20)
+        self.create_widgets()
+
+    def create_widgets(self) -> None:
+        """Create widgets."""
+        # bug in sockets library wont allow you to raise keyboardinterrupt, so stopping
+        # Content frame will have 4 rows. first will be header, 2nd is energy graph, 3rd will contain a frame that has censusreport.org links, 4th will have progress bar frame
+        self.content_frame = ctk.CTkFrame(self)
+        self.content_banner_frame = ctk.CTkFrame(self.content_frame)
+        self.state_and_year_content_banner_dropdown_frame = ctk.CTkFrame(
+            self.content_banner_frame
+        )
+        self.census_reporter_frame = ctk.CTkFrame(self.content_frame)
+        self.log_frame = ctk.CTkFrame(self.content_frame)
+
+        self.content_banner_main_text = ctk.CTkLabel(
+            self.content_banner_frame,
+            text="Census and Energy Data:",
+            font=self.roboto_header_font,
+        )
+        self.content_banner_main_text.bind(
+            "<Configure>",
+            command=lambda x: self.content_banner_main_text.configure(
+                wraplength=self.content_banner_main_text._current_width
+                - 40  # random padding
+            ),
+        )
+        # nested frame for holding filters and text inside banner frame
+
+        self.select_state_label = ctk.CTkLabel(
+            self.state_and_year_content_banner_dropdown_frame,
+            text="Select State",
+            font=self.roboto_font,
+        )
+        self.select_state_dropdown = ctk.CTkOptionMenu(
+            self.state_and_year_content_banner_dropdown_frame,
+            values=None,
+            command=self.state_dropdown_callback,
+        )
+
+        self.select_year_label = ctk.CTkLabel(
+            self.state_and_year_content_banner_dropdown_frame,
+            text="Select Year",
+            font=self.roboto_font,
+        )
+        self.select_year_dropdown = ctk.CTkOptionMenu(
+            self.state_and_year_content_banner_dropdown_frame,
+            values=self.years,
+            command=self.year_dropdown_callback,
+        )
+
+        self.energy_graph_frame = ctk.CTkFrame(self.content_frame)
+
+        self.census_reporter_state_label = ctk.CTkLabel(
+            self.census_reporter_frame,
+            text="Census Reporter: State Report",
+            font=self.roboto_link_font,
+            cursor="hand2",
+            text_color="blue",
+        )
+
+        self.log_button = ctk.CTkButton(
+            self.log_frame, text="Open Log File", command=self.open_log_file
+        )
+        self.census_button = ctk.CTkButton(
+            self.log_frame,
+            text="Generate Census data",
+            command=self.generate_census_reports,
+        )
+        self.census_reporter_state_label.bind(
+            "<Button-1>", lambda x: self.open_census_reporter_state()
+        )
+        self.census_reporter_metro_label = ctk.CTkLabel(
+            self.census_reporter_frame,
+            text="Census Reporter: Metro Report",
+            font=self.roboto_link_font,
+            cursor="hand2",
+            text_color="blue",
+        )
+        self.census_reporter_metro_label.bind(
+            "<Button-1>", lambda x: self.open_census_reporter_metro()
+        )
+        # create grid
+        # col
+        self.columnconfigure(0, weight=1)
+        self.content_frame.columnconfigure(0, weight=1)
+        self.content_banner_frame.columnconfigure((0, 1), weight=1)
+        self.state_and_year_content_banner_dropdown_frame.columnconfigure(
+            (0, 1), weight=1
+        )
+        self.energy_graph_frame.columnconfigure(0, weight=1)
+        self.census_reporter_frame.columnconfigure(0, weight=1)
+        self.log_frame.columnconfigure((0, 1), weight=1)
+
+        # row
+        self.rowconfigure(0, weight=1)
+
+        self.content_frame.rowconfigure(0, weight=1)  # banner
+        self.content_frame.rowconfigure(1, weight=5)  # energy graph
+        self.content_frame.rowconfigure(2, weight=2)  # census reporter frame
+        self.content_frame.rowconfigure(3, weight=1)
 
-        self.census_reporter_frame.rowconfigure((0, 1), weight=1)
+        self.content_banner_frame.rowconfigure(0, weight=1)
 
-        self.log_frame.rowconfigure(0, weight=1)
+        self.state_and_year_content_banner_dropdown_frame.rowconfigure((0, 1), weight=1)
 
-        # placement
-        self.content_frame.grid(column=0, row=0, sticky="news")
-
-        self.content_banner_frame.grid(column=0, row=0, sticky="news")
-
-        self.content_banner_main_text.grid(column=0, row=0, sticky="nsew")
-
-        self.state_and_year_content_banner_dropdown_frame.grid(
-            column=1, row=0, sticky="news"
-        )
+        self.energy_graph_frame.rowconfigure(0, weight=1)
+
+        self.census_reporter_frame.rowconfigure((0, 1), weight=1)
+
+        self.log_frame.rowconfigure(0, weight=1)
+
+        # placement
+        self.content_frame.grid(column=0, row=0, sticky="news")
+
+        self.content_banner_frame.grid(column=0, row=0, sticky="news")
 
-        self.select_state_label.grid(column=0, row=0, sticky="news")
-        self.select_year_label.grid(column=1, row=0, sticky="news")
-        self.select_state_dropdown.grid(column=0, row=1)
-        self.select_year_dropdown.grid(column=1, row=1)
-
-        self.energy_graph_frame.grid(column=0, row=1, sticky="news")
-
-        self.census_reporter_frame.grid(column=0, row=2, sticky="news")
-        self.census_reporter_state_label.grid(column=0, row=0)
-        self.census_reporter_metro_label.grid(column=0, row=1)
+        self.content_banner_main_text.grid(column=0, row=0, sticky="nsew")
+
+        self.state_and_year_content_banner_dropdown_frame.grid(
+            column=1, row=0, sticky="news"
+        )
+
+        self.select_state_label.grid(column=0, row=0, sticky="news")
+        self.select_year_label.grid(column=1, row=0, sticky="news")
+        self.select_state_dropdown.grid(column=0, row=1)
+        self.select_year_dropdown.grid(column=1, row=1)
 
-        self.log_frame.grid(column=0, row=3, sticky="news")
-        self.log_button.grid(column=0, row=0, pady=10)
-
-    def set_msa_name(self, msa_name: str) -> None:
-        """Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.
+        self.energy_graph_frame.grid(column=0, row=1, sticky="news")
+
+        self.census_reporter_frame.grid(column=0, row=2, sticky="news")
+        self.census_reporter_state_label.grid(column=0, row=0)
+        self.census_reporter_metro_label.grid(column=0, row=1)
 
-        Args:
-            msa_name (str): Metropolitan Statistical Area name. This must be validated
-        """
-        self.msa_name = msa_name
-        self.states_in_msa = helper.get_states_in_msa(self.msa_name)
-
-        if len(self.states_in_msa) > 0:
-            self.select_state_dropdown.configure()
-            self.select_state_dropdown.set(self.states_in_msa[0])
-
-        self.select_state_dropdown.configure(values=self.states_in_msa)
-        self.content_banner_main_text.configure(
-            text=f"Census and Energy Data: {self.msa_name}"
-        )
-        self.zip_list = helper.metro_name_to_zip_code_list(msa_name)
-        self.zip_list = [str(zip) for zip in self.zip_list]
+        self.log_frame.grid(column=0, row=3, sticky="news")
+        self.census_button.grid(column=0, row=0, pady=10, padx=(0, 10))
+        self.log_button.grid(column=1, row=0, pady=10, padx=(10, 0))
+
+    def set_msa_name(self, msa_name: str) -> None:
+        """Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.
+
+        Args:
+            msa_name (str): Metropolitan Statistical Area name. This must be validated
+        """
+        self.msa_name = msa_name
+        self.states_in_msa = helper.get_states_in_msa(self.msa_name)
+
+        if len(self.states_in_msa) > 0:
+            self.select_state_dropdown.configure()
+            self.select_state_dropdown.set(self.states_in_msa[0])
 
-        threading.Thread(
-            target=self.generate_energy_plot,
-            args=(
-                int(self.select_year_dropdown.get()),
-                self.select_state_dropdown.get(),
-            ),
-            daemon=True,
-        ).start()
-
-    def generate_energy_plot(self, year: int, state: str) -> None:
-        """Call the EIA API and generate a plot with the received data.
-
-        Note:
-            Call this in a thread so that it doesn't freeze the GUI
-            Update: might want to just get the data and plot on the main thread
-        """
-        eia = EIADataRetriever()
-        energy_price_per_mbtu_by_type_for_state = (
-            eia.monthly_price_per_mbtu_by_energy_type_by_state(
-                state, datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)
-            )
-        )
-
-        fig = Figure(layout="compressed", facecolor="blue")
-        ax = fig.add_subplot()
-        ax.set_xlabel("Time (Months)")
-        ax.set_ylabel("Cost per Effective MBTU ($/MBTU)")
-        ax.set_title(
-            f"Avg. Energy Prices by Appliance for {state}, {year}",
-            loc="center",
-            wrap=True,
-        )
-        months = [i for i in range(1, 13)]
-        month_names = [
-            "Jan",
-            "Feb",
-            "Mar",
-            "Apr",
-            "May",
-            "Jun",
-            "Jul",
-            "Aug",
-            "Sep",
-            "Oct",
-            "Nov",
-            "Dec",
-        ]
-        ax.set_xticks(months)
-        labels = [item.get_text() for item in ax.get_xticklabels()]
-
-        # Modify specific labels, keeping offset
-        for i in range(0, 12):
-            labels[i] = month_names[i]
-        ax.set_xticklabels(labels)
-
-        for energy_dict in energy_price_per_mbtu_by_type_for_state:
-            if len(energy_dict) < 3:
-                log(
-                    f"Issue with energy type {energy_dict.get("type")} for state {energy_dict.get("state")}",
-                    "debug",
-                )
-                continue
-            match energy_dict.get("type"):
-                case EIADataRetriever.EnergyType.PROPANE.value:
-                    result_list = []
-                    for month in months:
-                        key = f"{year}-{month:02}"
-                        val = energy_dict.get(key, float("NaN"))
-                        if val is None:
-                            val = float("NaN")
-                        result_list.append(val)
-                    ax.plot(months, result_list, label="Propane Furnace")
-                case EIADataRetriever.EnergyType.HEATING_OIL.value:
-                    result_list = []
-                    for month in months:
-                        key = f"{year}-{month:02}"
-                        val = energy_dict.get(key, float("NaN"))
-                        if val is None:
-                            val = float("NaN")
-                        result_list.append(val)
-                    ax.plot(months, result_list, label="Heating Oil Boiler")
-                case EIADataRetriever.EnergyType.NATURAL_GAS.value:
-                    result_list = []
-                    for month in months:
-                        key = f"{year}-{month:02}"
-                        val = energy_dict.get(key, float("NaN"))
-                        if val is None:
-                            val = float("NaN")
-                        result_list.append(val)
-                    ax.plot(months, result_list, label="Natural Gas Furnace")
-                case EIADataRetriever.EnergyType.ELECTRICITY.value:
-                    result_list = []
-                    for month in months:
-                        key = f"{year}-{month:02}"
-                        val = energy_dict.get(key, float("NaN"))
-                        if val is None:
-                            val = float("NaN")
-                        result_list.append(val)
-                    ax.plot(months, result_list, label="Ducted Heat Pump")
-        ax.legend()
-        with threading.Lock():
-            canvas = FigureCanvasTkAgg(fig, master=self.energy_graph_frame)
-            canvas.draw()
-
-            # toolbar = NavigationToolbar2Tk(canvas, window=self.energy_graph_frame, pack_toolbar=False)
-            # toolbar.update()
-            # canvas.mpl_connect("key_press_event", key_press_handler)
-
-            # toolbar.grid(column=0, row=1, sticky="news")
-            canvas.get_tk_widget().grid(column=0, row=0)
+        self.select_state_dropdown.configure(values=self.states_in_msa)
+        self.content_banner_main_text.configure(
+            text=f"Census and Energy Data: {self.msa_name}"
+        )
+        self.zip_list = helper.metro_name_to_zip_code_list(msa_name)
+        self.zip_list = [str(zip) for zip in self.zip_list]
+
+        threading.Thread(
+            target=self.generate_energy_plot,
+            args=(
+                int(self.select_year_dropdown.get()),
+                self.select_state_dropdown.get(),
+            ),
+            daemon=True,
+        ).start()
+
+    def generate_energy_plot(self, year: int, state: str) -> None:
+        """Call the EIA API and generate a plot with the received data.
+
+        Note:
+            Call this in a thread so that it doesn't freeze the GUI
+            Update: might want to just get the data and plot on the main thread
+        """
+        eia = EIADataRetriever()
+        energy_price_per_mbtu_by_type_for_state = (
+            eia.monthly_price_per_mbtu_by_energy_type_by_state(
+                state, datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)
+            )
+        )
+
+        fig = Figure(layout="compressed", facecolor="#dbdbdb")
+        ax = fig.add_subplot()
+        ax.set_xlabel("Time (Months)")
+        ax.set_ylabel("Cost per Effective MBTU ($/MBTU)")
+        ax.set_title(
+            f"Avg. Energy Prices by Appliance for {state}, {year}",
+            loc="center",
+            wrap=True,
+        )
+        months = [i for i in range(1, 13)]
+        month_names = [
+            "Jan",
+            "Feb",
+            "Mar",
+            "Apr",
+            "May",
+            "Jun",
+            "Jul",
+            "Aug",
+            "Sep",
+            "Oct",
+            "Nov",
+            "Dec",
+        ]
+        ax.set_xticks(months)
+        labels = [item.get_text() for item in ax.get_xticklabels()]
+
+        # Modify specific labels, keeping offset
+        for i in range(0, 12):
+            labels[i] = month_names[i]
+        ax.set_xticklabels(labels)
+
+        for energy_dict in energy_price_per_mbtu_by_type_for_state:
+            if len(energy_dict) < 3:
+                log(
+                    f"Issue with energy type {energy_dict.get("type")} for state {energy_dict.get("state")}",
+                    "debug",
+                )
+                continue
+            match energy_dict.get("type"):
+                case EIADataRetriever.EnergyType.PROPANE.value:
+                    result_list = []
+                    for month in months:
+                        key = f"{year}-{month:02}"
+                        val = energy_dict.get(key, float("NaN"))
+                        if val is None:
+                            val = float("NaN")
+                        result_list.append(val)
+                    ax.plot(months, result_list, label="Propane Furnace")
+                case EIADataRetriever.EnergyType.HEATING_OIL.value:
+                    result_list = []
+                    for month in months:
+                        key = f"{year}-{month:02}"
+                        val = energy_dict.get(key, float("NaN"))
+                        if val is None:
+                            val = float("NaN")
+                        result_list.append(val)
+                    ax.plot(months, result_list, label="Heating Oil Boiler")
+                case EIADataRetriever.EnergyType.NATURAL_GAS.value:
+                    result_list = []
+                    for month in months:
+                        key = f"{year}-{month:02}"
+                        val = energy_dict.get(key, float("NaN"))
+                        if val is None:
+                            val = float("NaN")
+                        result_list.append(val)
+                    ax.plot(months, result_list, label="Natural Gas Furnace")
+                case EIADataRetriever.EnergyType.ELECTRICITY.value:
+                    result_list = []
+                    for month in months:
+                        key = f"{year}-{month:02}"
+                        val = energy_dict.get(key, float("NaN"))
+                        if val is None:
+                            val = float("NaN")
+                        result_list.append(val)
+                    ax.plot(months, result_list, label="Ducted Heat Pump")
+        ax.legend()
+        with threading.Lock():
+            canvas = FigureCanvasTkAgg(fig, master=self.energy_graph_frame)
+            canvas.draw()
 
-    def open_census_reporter_state(self) -> None:
-        """Census reporter state label callback"""
-        state_link = helper.get_census_report_url_page(
-            sts.lookup(self.select_state_dropdown.get()).name  # type: ignore
-        )
-        webbrowser.open_new_tab(state_link)
+            # toolbar = NavigationToolbar2Tk(canvas, window=self.energy_graph_frame, pack_toolbar=False)
+            # toolbar.update()
+            # canvas.mpl_connect("key_press_event", key_press_handler)
+
+            # toolbar.grid(column=0, row=1, sticky="news")
+            canvas.get_tk_widget().grid(column=0, row=0)
 
-    def open_census_reporter_metro(self) -> None:
-        """Census reporter metro label callback"""
-        metro_link = helper.get_census_report_url_page(f"{self.msa_name} metro area")  # type: ignore
-        webbrowser.open_new_tab(metro_link)
-
-    def state_dropdown_callback(self, state: str) -> None:
-        """Banner state callback.
-        TODO:
-            check if thread is running with given name, and if so join it and start the new thread
-
-        Args:
-            state (str): the state after the change
-        """
-
-        threading.Thread(
-            target=self.generate_energy_plot,
-            args=(
-                int(self.select_year_dropdown.get()),
-                state,
-            ),
-            name="energy_thread",
-            daemon=True,
-        ).start()
-
-    def year_dropdown_callback(self, year: str) -> None:
-        """Banner year callback.
-        TODO:
-            Check if thread is running with given name, and if so join it and start the new thread
-
-        Args:
-            year (str): the year after the change
-        """
-        threading.Thread(
-            target=self.generate_energy_plot,
-            args=(
-                int(year),
-                self.select_state_dropdown.get(),
-            ),
-            name="energy_thread",
-            daemon=True,
-        ).start()
-
-    def open_log_file(self) -> None:
-        """Open logging file.
-
-        Note:
-            Haven't tested this on mac/linux. "darwin" doesn't exist in `system.platform` on windows, so cant say for sure if this works
-        """
-        try:
-            if sys.platform == "win32":
-                startfile(helper.LOGGING_FILE_PATH)
-            else:
-                opener = "open" if sys.platform == "darwin" else "xdg-open"
-                subprocess.call([opener, helper.LOGGING_FILE_PATH])
-        except FileNotFoundError:
-            CTkMessagebox(
-                self,
-                title="Error",
-                message="Logging file doesn't exist! Try rerunning the program or creating a logger.log file in /output/logging/",
-                icon="warning",
-            )
+    def open_census_reporter_state(self) -> None:
+        """Census reporter state label callback"""
+        state_link = helper.get_census_report_url_page(
+            sts.lookup(self.select_state_dropdown.get()).name  # type: ignore
+        )
+        webbrowser.open_new_tab(state_link)
+
+    def open_census_reporter_metro(self) -> None:
+        """Census reporter metro label callback"""
+        metro_link = helper.get_census_report_url_page(f"{self.msa_name} metro area")  # type: ignore
+        webbrowser.open_new_tab(metro_link)
+
+    def state_dropdown_callback(self, state: str) -> None:
+        """Banner state callback.
+        TODO:
+            check if thread is running with given name, and if so join it and start the new thread
+
+        Args:
+            state (str): the state after the change
+        """
+
+        threading.Thread(
+            target=self.generate_energy_plot,
+            args=(
+                int(self.select_year_dropdown.get()),
+                state,
+            ),
+            name="energy_thread",
+            daemon=True,
+        ).start()
+
+    def year_dropdown_callback(self, year: str) -> None:
+        """Banner year callback.
+        TODO:
+            Check if thread is running with given name, and if so join it and start the new thread
+
+        Args:
+            year (str): the year after the change
+        """
+        threading.Thread(
+            target=self.generate_energy_plot,
+            args=(
+                int(year),
+                self.select_state_dropdown.get(),
+            ),
+            name="energy_thread",
+            daemon=True,
+        ).start()
+
+    def open_log_file(self) -> None:
+        """Open logging file.
+
+        Note:
+            Haven't tested this on mac/linux. "darwin" doesn't exist in `system.platform` on windows, so cant say for sure if this works
+        """
+        try:
+            if sys.platform == "win32":
+                from os import startfile
+
+                startfile(helper.LOGGING_FILE_PATH)
+            else:
+                opener = "open" if sys.platform == "darwin" else "xdg-open"
+                subprocess.call([opener, helper.LOGGING_FILE_PATH])
+        except FileNotFoundError:
+            CTkMessagebox(
+                self,
+                title="Error",
+                message="Logging file doesn't exist! Try rerunning the program or creating a logger.log file in /output/logging/",
+                icon="warning",
+            )
+
+    def generate_census_reports(self) -> None:
+        log("Fetching census reports...", "info")
+        c = CensusDataRetriever()
+        threading.Thread(
+            target=c.generate_acs5_subject_table_group_for_zcta_by_year,
+            args=(
+                "S1901",
+                "2019",
+            ),
+        ).start()
+        threading.Thread(
+            target=c.generate_acs5_profile_table_group_for_zcta_by_year,
+            args=(
+                "DP05",
+                "2019",
+            ),
+        ).start()
 
@@ -1492,8 +1544,7 @@

Source code in src\gui\datapage.py -
 47
- 48
+            
 48
  49
  50
  51
@@ -1619,134 +1670,147 @@ 

171 172 173 -174

def create_widgets(self) -> None:
-    """Create widgets."""
-    # bug in sockets library wont allow you to raise keyboardinterrupt, so stopping
-    # Content frame will have 4 rows. first will be header, 2nd is energy graph, 3rd will contain a frame that has censusreport.org links, 4th will have progress bar frame
-    self.content_frame = ctk.CTkFrame(self, border_width=2)
-    self.content_banner_frame = ctk.CTkFrame(self.content_frame, border_width=2)
-    self.state_and_year_content_banner_dropdown_frame = ctk.CTkFrame(
-        self.content_banner_frame, border_width=2
-    )
-    self.census_reporter_frame = ctk.CTkFrame(self.content_frame, border_width=2)
-    self.log_frame = ctk.CTkFrame(self.content_frame, border_width=2)
-
-    self.content_banner_main_text = ctk.CTkLabel(
-        self.content_banner_frame,
-        text="Census and Energy Data:",
-        font=self.roboto_header_font,
-    )
-    self.content_banner_main_text.bind(
-        "<Configure>",
-        command=lambda x: self.content_banner_main_text.configure(
-            wraplength=self.content_banner_main_text._current_width
-            - 40  # random padding
-        ),
-    )
-    # nested frame for holding filters and text inside banner frame
-
-    self.select_state_label = ctk.CTkLabel(
-        self.state_and_year_content_banner_dropdown_frame,
-        text="Select State",
-        font=self.roboto_font,
-    )
-    self.select_state_dropdown = ctk.CTkOptionMenu(
-        self.state_and_year_content_banner_dropdown_frame,
-        values=None,
-        command=self.state_dropdown_callback,
-    )
-
-    self.select_year_label = ctk.CTkLabel(
-        self.state_and_year_content_banner_dropdown_frame,
-        text="Select Year",
-        font=self.roboto_font,
-    )
-    self.select_year_dropdown = ctk.CTkOptionMenu(
-        self.state_and_year_content_banner_dropdown_frame,
-        values=self.years,
-        command=self.year_dropdown_callback,
-    )
-
-    self.energy_graph_frame = ctk.CTkFrame(self.content_frame, border_width=2)
-
-    self.census_reporter_state_label = ctk.CTkLabel(
-        self.census_reporter_frame,
-        text="Census Reporter: State Report",
-        font=self.roboto_link_font,
-        cursor="hand2",
-        text_color="blue",
-    )
-
-    self.log_button = ctk.CTkButton(
-        self.log_frame, text="Open Log File", command=self.open_log_file
-    )
-    self.census_reporter_state_label.bind(
-        "<Button-1>", lambda x: self.open_census_reporter_state()
-    )
-    self.census_reporter_metro_label = ctk.CTkLabel(
-        self.census_reporter_frame,
-        text="Census Reporter: Metro Report",
-        font=self.roboto_link_font,
-        cursor="hand2",
-        text_color="blue",
-    )
-    self.census_reporter_metro_label.bind(
-        "<Button-1>", lambda x: self.open_census_reporter_metro()
-    )
-    # create grid
-    # col
-    self.columnconfigure(0, weight=1)
-    self.content_frame.columnconfigure(0, weight=1)
-    self.content_banner_frame.columnconfigure((0, 1), weight=1)
-    self.state_and_year_content_banner_dropdown_frame.columnconfigure(
-        (0, 1), weight=1
-    )
-    self.energy_graph_frame.columnconfigure(0, weight=1)
-    self.census_reporter_frame.columnconfigure(0, weight=1)
-    self.log_frame.columnconfigure(0, weight=1)
-
-    # row
-    self.rowconfigure(0, weight=1)
-
-    self.content_frame.rowconfigure(0, weight=1)  # banner
-    self.content_frame.rowconfigure(1, weight=5)  # energy graph
-    self.content_frame.rowconfigure(2, weight=2)  # census reporter frame
-    self.content_frame.rowconfigure(3, weight=1)
-
-    self.content_banner_frame.rowconfigure(0, weight=1)
-
-    self.state_and_year_content_banner_dropdown_frame.rowconfigure((0, 1), weight=1)
-
-    self.energy_graph_frame.rowconfigure(0, weight=1)
+174
+175
+176
+177
+178
+179
+180
+181
def create_widgets(self) -> None:
+    """Create widgets."""
+    # bug in sockets library wont allow you to raise keyboardinterrupt, so stopping
+    # Content frame will have 4 rows. first will be header, 2nd is energy graph, 3rd will contain a frame that has censusreport.org links, 4th will have progress bar frame
+    self.content_frame = ctk.CTkFrame(self)
+    self.content_banner_frame = ctk.CTkFrame(self.content_frame)
+    self.state_and_year_content_banner_dropdown_frame = ctk.CTkFrame(
+        self.content_banner_frame
+    )
+    self.census_reporter_frame = ctk.CTkFrame(self.content_frame)
+    self.log_frame = ctk.CTkFrame(self.content_frame)
+
+    self.content_banner_main_text = ctk.CTkLabel(
+        self.content_banner_frame,
+        text="Census and Energy Data:",
+        font=self.roboto_header_font,
+    )
+    self.content_banner_main_text.bind(
+        "<Configure>",
+        command=lambda x: self.content_banner_main_text.configure(
+            wraplength=self.content_banner_main_text._current_width
+            - 40  # random padding
+        ),
+    )
+    # nested frame for holding filters and text inside banner frame
+
+    self.select_state_label = ctk.CTkLabel(
+        self.state_and_year_content_banner_dropdown_frame,
+        text="Select State",
+        font=self.roboto_font,
+    )
+    self.select_state_dropdown = ctk.CTkOptionMenu(
+        self.state_and_year_content_banner_dropdown_frame,
+        values=None,
+        command=self.state_dropdown_callback,
+    )
+
+    self.select_year_label = ctk.CTkLabel(
+        self.state_and_year_content_banner_dropdown_frame,
+        text="Select Year",
+        font=self.roboto_font,
+    )
+    self.select_year_dropdown = ctk.CTkOptionMenu(
+        self.state_and_year_content_banner_dropdown_frame,
+        values=self.years,
+        command=self.year_dropdown_callback,
+    )
+
+    self.energy_graph_frame = ctk.CTkFrame(self.content_frame)
+
+    self.census_reporter_state_label = ctk.CTkLabel(
+        self.census_reporter_frame,
+        text="Census Reporter: State Report",
+        font=self.roboto_link_font,
+        cursor="hand2",
+        text_color="blue",
+    )
+
+    self.log_button = ctk.CTkButton(
+        self.log_frame, text="Open Log File", command=self.open_log_file
+    )
+    self.census_button = ctk.CTkButton(
+        self.log_frame,
+        text="Generate Census data",
+        command=self.generate_census_reports,
+    )
+    self.census_reporter_state_label.bind(
+        "<Button-1>", lambda x: self.open_census_reporter_state()
+    )
+    self.census_reporter_metro_label = ctk.CTkLabel(
+        self.census_reporter_frame,
+        text="Census Reporter: Metro Report",
+        font=self.roboto_link_font,
+        cursor="hand2",
+        text_color="blue",
+    )
+    self.census_reporter_metro_label.bind(
+        "<Button-1>", lambda x: self.open_census_reporter_metro()
+    )
+    # create grid
+    # col
+    self.columnconfigure(0, weight=1)
+    self.content_frame.columnconfigure(0, weight=1)
+    self.content_banner_frame.columnconfigure((0, 1), weight=1)
+    self.state_and_year_content_banner_dropdown_frame.columnconfigure(
+        (0, 1), weight=1
+    )
+    self.energy_graph_frame.columnconfigure(0, weight=1)
+    self.census_reporter_frame.columnconfigure(0, weight=1)
+    self.log_frame.columnconfigure((0, 1), weight=1)
+
+    # row
+    self.rowconfigure(0, weight=1)
+
+    self.content_frame.rowconfigure(0, weight=1)  # banner
+    self.content_frame.rowconfigure(1, weight=5)  # energy graph
+    self.content_frame.rowconfigure(2, weight=2)  # census reporter frame
+    self.content_frame.rowconfigure(3, weight=1)
 
-    self.census_reporter_frame.rowconfigure((0, 1), weight=1)
+    self.content_banner_frame.rowconfigure(0, weight=1)
 
-    self.log_frame.rowconfigure(0, weight=1)
+    self.state_and_year_content_banner_dropdown_frame.rowconfigure((0, 1), weight=1)
 
-    # placement
-    self.content_frame.grid(column=0, row=0, sticky="news")
-
-    self.content_banner_frame.grid(column=0, row=0, sticky="news")
-
-    self.content_banner_main_text.grid(column=0, row=0, sticky="nsew")
-
-    self.state_and_year_content_banner_dropdown_frame.grid(
-        column=1, row=0, sticky="news"
-    )
+    self.energy_graph_frame.rowconfigure(0, weight=1)
+
+    self.census_reporter_frame.rowconfigure((0, 1), weight=1)
+
+    self.log_frame.rowconfigure(0, weight=1)
+
+    # placement
+    self.content_frame.grid(column=0, row=0, sticky="news")
+
+    self.content_banner_frame.grid(column=0, row=0, sticky="news")
 
-    self.select_state_label.grid(column=0, row=0, sticky="news")
-    self.select_year_label.grid(column=1, row=0, sticky="news")
-    self.select_state_dropdown.grid(column=0, row=1)
-    self.select_year_dropdown.grid(column=1, row=1)
-
-    self.energy_graph_frame.grid(column=0, row=1, sticky="news")
-
-    self.census_reporter_frame.grid(column=0, row=2, sticky="news")
-    self.census_reporter_state_label.grid(column=0, row=0)
-    self.census_reporter_metro_label.grid(column=0, row=1)
+    self.content_banner_main_text.grid(column=0, row=0, sticky="nsew")
+
+    self.state_and_year_content_banner_dropdown_frame.grid(
+        column=1, row=0, sticky="news"
+    )
+
+    self.select_state_label.grid(column=0, row=0, sticky="news")
+    self.select_year_label.grid(column=1, row=0, sticky="news")
+    self.select_state_dropdown.grid(column=0, row=1)
+    self.select_year_dropdown.grid(column=1, row=1)
 
-    self.log_frame.grid(column=0, row=3, sticky="news")
-    self.log_button.grid(column=0, row=0, pady=10)
+    self.energy_graph_frame.grid(column=0, row=1, sticky="news")
+
+    self.census_reporter_frame.grid(column=0, row=2, sticky="news")
+    self.census_reporter_state_label.grid(column=0, row=0)
+    self.census_reporter_metro_label.grid(column=0, row=1)
+
+    self.log_frame.grid(column=0, row=3, sticky="news")
+    self.census_button.grid(column=0, row=0, pady=10, padx=(0, 10))
+    self.log_button.grid(column=1, row=0, pady=10, padx=(10, 0))
 
@@ -1776,14 +1840,7 @@

Source code in src\gui\datapage.py -
205
-206
-207
-208
-209
-210
-211
-212
+            
212
 213
 214
 215
@@ -1876,107 +1933,114 @@ 

302 303 304 -305

def generate_energy_plot(self, year: int, state: str) -> None:
-    """Call the EIA API and generate a plot with the received data.
-
-    Note:
-        Call this in a thread so that it doesn't freeze the GUI
-        Update: might want to just get the data and plot on the main thread
-    """
-    eia = EIADataRetriever()
-    energy_price_per_mbtu_by_type_for_state = (
-        eia.monthly_price_per_mbtu_by_energy_type_by_state(
-            state, datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)
-        )
-    )
-
-    fig = Figure(layout="compressed", facecolor="blue")
-    ax = fig.add_subplot()
-    ax.set_xlabel("Time (Months)")
-    ax.set_ylabel("Cost per Effective MBTU ($/MBTU)")
-    ax.set_title(
-        f"Avg. Energy Prices by Appliance for {state}, {year}",
-        loc="center",
-        wrap=True,
-    )
-    months = [i for i in range(1, 13)]
-    month_names = [
-        "Jan",
-        "Feb",
-        "Mar",
-        "Apr",
-        "May",
-        "Jun",
-        "Jul",
-        "Aug",
-        "Sep",
-        "Oct",
-        "Nov",
-        "Dec",
-    ]
-    ax.set_xticks(months)
-    labels = [item.get_text() for item in ax.get_xticklabels()]
-
-    # Modify specific labels, keeping offset
-    for i in range(0, 12):
-        labels[i] = month_names[i]
-    ax.set_xticklabels(labels)
-
-    for energy_dict in energy_price_per_mbtu_by_type_for_state:
-        if len(energy_dict) < 3:
-            log(
-                f"Issue with energy type {energy_dict.get("type")} for state {energy_dict.get("state")}",
-                "debug",
-            )
-            continue
-        match energy_dict.get("type"):
-            case EIADataRetriever.EnergyType.PROPANE.value:
-                result_list = []
-                for month in months:
-                    key = f"{year}-{month:02}"
-                    val = energy_dict.get(key, float("NaN"))
-                    if val is None:
-                        val = float("NaN")
-                    result_list.append(val)
-                ax.plot(months, result_list, label="Propane Furnace")
-            case EIADataRetriever.EnergyType.HEATING_OIL.value:
-                result_list = []
-                for month in months:
-                    key = f"{year}-{month:02}"
-                    val = energy_dict.get(key, float("NaN"))
-                    if val is None:
-                        val = float("NaN")
-                    result_list.append(val)
-                ax.plot(months, result_list, label="Heating Oil Boiler")
-            case EIADataRetriever.EnergyType.NATURAL_GAS.value:
-                result_list = []
-                for month in months:
-                    key = f"{year}-{month:02}"
-                    val = energy_dict.get(key, float("NaN"))
-                    if val is None:
-                        val = float("NaN")
-                    result_list.append(val)
-                ax.plot(months, result_list, label="Natural Gas Furnace")
-            case EIADataRetriever.EnergyType.ELECTRICITY.value:
-                result_list = []
-                for month in months:
-                    key = f"{year}-{month:02}"
-                    val = energy_dict.get(key, float("NaN"))
-                    if val is None:
-                        val = float("NaN")
-                    result_list.append(val)
-                ax.plot(months, result_list, label="Ducted Heat Pump")
-    ax.legend()
-    with threading.Lock():
-        canvas = FigureCanvasTkAgg(fig, master=self.energy_graph_frame)
-        canvas.draw()
-
-        # toolbar = NavigationToolbar2Tk(canvas, window=self.energy_graph_frame, pack_toolbar=False)
-        # toolbar.update()
-        # canvas.mpl_connect("key_press_event", key_press_handler)
-
-        # toolbar.grid(column=0, row=1, sticky="news")
-        canvas.get_tk_widget().grid(column=0, row=0)
+305
+306
+307
+308
+309
+310
+311
+312
def generate_energy_plot(self, year: int, state: str) -> None:
+    """Call the EIA API and generate a plot with the received data.
+
+    Note:
+        Call this in a thread so that it doesn't freeze the GUI
+        Update: might want to just get the data and plot on the main thread
+    """
+    eia = EIADataRetriever()
+    energy_price_per_mbtu_by_type_for_state = (
+        eia.monthly_price_per_mbtu_by_energy_type_by_state(
+            state, datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)
+        )
+    )
+
+    fig = Figure(layout="compressed", facecolor="#dbdbdb")
+    ax = fig.add_subplot()
+    ax.set_xlabel("Time (Months)")
+    ax.set_ylabel("Cost per Effective MBTU ($/MBTU)")
+    ax.set_title(
+        f"Avg. Energy Prices by Appliance for {state}, {year}",
+        loc="center",
+        wrap=True,
+    )
+    months = [i for i in range(1, 13)]
+    month_names = [
+        "Jan",
+        "Feb",
+        "Mar",
+        "Apr",
+        "May",
+        "Jun",
+        "Jul",
+        "Aug",
+        "Sep",
+        "Oct",
+        "Nov",
+        "Dec",
+    ]
+    ax.set_xticks(months)
+    labels = [item.get_text() for item in ax.get_xticklabels()]
+
+    # Modify specific labels, keeping offset
+    for i in range(0, 12):
+        labels[i] = month_names[i]
+    ax.set_xticklabels(labels)
+
+    for energy_dict in energy_price_per_mbtu_by_type_for_state:
+        if len(energy_dict) < 3:
+            log(
+                f"Issue with energy type {energy_dict.get("type")} for state {energy_dict.get("state")}",
+                "debug",
+            )
+            continue
+        match energy_dict.get("type"):
+            case EIADataRetriever.EnergyType.PROPANE.value:
+                result_list = []
+                for month in months:
+                    key = f"{year}-{month:02}"
+                    val = energy_dict.get(key, float("NaN"))
+                    if val is None:
+                        val = float("NaN")
+                    result_list.append(val)
+                ax.plot(months, result_list, label="Propane Furnace")
+            case EIADataRetriever.EnergyType.HEATING_OIL.value:
+                result_list = []
+                for month in months:
+                    key = f"{year}-{month:02}"
+                    val = energy_dict.get(key, float("NaN"))
+                    if val is None:
+                        val = float("NaN")
+                    result_list.append(val)
+                ax.plot(months, result_list, label="Heating Oil Boiler")
+            case EIADataRetriever.EnergyType.NATURAL_GAS.value:
+                result_list = []
+                for month in months:
+                    key = f"{year}-{month:02}"
+                    val = energy_dict.get(key, float("NaN"))
+                    if val is None:
+                        val = float("NaN")
+                    result_list.append(val)
+                ax.plot(months, result_list, label="Natural Gas Furnace")
+            case EIADataRetriever.EnergyType.ELECTRICITY.value:
+                result_list = []
+                for month in months:
+                    key = f"{year}-{month:02}"
+                    val = energy_dict.get(key, float("NaN"))
+                    if val is None:
+                        val = float("NaN")
+                    result_list.append(val)
+                ax.plot(months, result_list, label="Ducted Heat Pump")
+    ax.legend()
+    with threading.Lock():
+        canvas = FigureCanvasTkAgg(fig, master=self.energy_graph_frame)
+        canvas.draw()
+
+        # toolbar = NavigationToolbar2Tk(canvas, window=self.energy_graph_frame, pack_toolbar=False)
+        # toolbar.update()
+        # canvas.mpl_connect("key_press_event", key_press_handler)
+
+        # toolbar.grid(column=0, row=1, sticky="news")
+        canvas.get_tk_widget().grid(column=0, row=0)
 
@@ -2001,13 +2065,13 @@

Source code in src\gui\datapage.py -
def open_census_reporter_metro(self) -> None:
-    """Census reporter metro label callback"""
-    metro_link = helper.get_census_report_url_page(f"{self.msa_name} metro area")  # type: ignore
-    webbrowser.open_new_tab(metro_link)
+            
def open_census_reporter_metro(self) -> None:
+    """Census reporter metro label callback"""
+    metro_link = helper.get_census_report_url_page(f"{self.msa_name} metro area")  # type: ignore
+    webbrowser.open_new_tab(metro_link)
 
@@ -2032,17 +2096,17 @@

Source code in src\gui\datapage.py -
def open_census_reporter_state(self) -> None:
-    """Census reporter state label callback"""
-    state_link = helper.get_census_report_url_page(
-        sts.lookup(self.select_state_dropdown.get()).name  # type: ignore
-    )
-    webbrowser.open_new_tab(state_link)
+            
def open_census_reporter_state(self) -> None:
+    """Census reporter state label callback"""
+    state_link = helper.get_census_report_url_page(
+        sts.lookup(self.select_state_dropdown.get()).name  # type: ignore
+    )
+    webbrowser.open_new_tab(state_link)
 
@@ -2071,14 +2135,7 @@

Source code in src\gui\datapage.py -
356
-357
-358
-359
-360
-361
-362
-363
+            
363
 364
 365
 366
@@ -2089,25 +2146,36 @@ 

371 372 373 -374

def open_log_file(self) -> None:
-    """Open logging file.
-
-    Note:
-        Haven't tested this on mac/linux. "darwin" doesn't exist in `system.platform` on windows, so cant say for sure if this works
-    """
-    try:
-        if sys.platform == "win32":
-            startfile(helper.LOGGING_FILE_PATH)
-        else:
-            opener = "open" if sys.platform == "darwin" else "xdg-open"
-            subprocess.call([opener, helper.LOGGING_FILE_PATH])
-    except FileNotFoundError:
-        CTkMessagebox(
-            self,
-            title="Error",
-            message="Logging file doesn't exist! Try rerunning the program or creating a logger.log file in /output/logging/",
-            icon="warning",
-        )
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
def open_log_file(self) -> None:
+    """Open logging file.
+
+    Note:
+        Haven't tested this on mac/linux. "darwin" doesn't exist in `system.platform` on windows, so cant say for sure if this works
+    """
+    try:
+        if sys.platform == "win32":
+            from os import startfile
+
+            startfile(helper.LOGGING_FILE_PATH)
+        else:
+            opener = "open" if sys.platform == "darwin" else "xdg-open"
+            subprocess.call([opener, helper.LOGGING_FILE_PATH])
+    except FileNotFoundError:
+        CTkMessagebox(
+            self,
+            title="Error",
+            message="Logging file doesn't exist! Try rerunning the program or creating a logger.log file in /output/logging/",
+            icon="warning",
+        )
 
@@ -2162,14 +2230,7 @@

Source code in src\gui\datapage.py -
176
-177
-178
-179
-180
-181
-182
-183
+            
183
 184
 185
 186
@@ -2189,34 +2250,41 @@ 

200 201 202 -203

def set_msa_name(self, msa_name: str) -> None:
-    """Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.
-
-    Args:
-        msa_name (str): Metropolitan Statistical Area name. This must be validated
-    """
-    self.msa_name = msa_name
-    self.states_in_msa = helper.get_states_in_msa(self.msa_name)
-
-    if len(self.states_in_msa) > 0:
-        self.select_state_dropdown.configure()
-        self.select_state_dropdown.set(self.states_in_msa[0])
-
-    self.select_state_dropdown.configure(values=self.states_in_msa)
-    self.content_banner_main_text.configure(
-        text=f"Census and Energy Data: {self.msa_name}"
-    )
-    self.zip_list = helper.metro_name_to_zip_code_list(msa_name)
-    self.zip_list = [str(zip) for zip in self.zip_list]
+203
+204
+205
+206
+207
+208
+209
+210
def set_msa_name(self, msa_name: str) -> None:
+    """Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.
+
+    Args:
+        msa_name (str): Metropolitan Statistical Area name. This must be validated
+    """
+    self.msa_name = msa_name
+    self.states_in_msa = helper.get_states_in_msa(self.msa_name)
+
+    if len(self.states_in_msa) > 0:
+        self.select_state_dropdown.configure()
+        self.select_state_dropdown.set(self.states_in_msa[0])
 
-    threading.Thread(
-        target=self.generate_energy_plot,
-        args=(
-            int(self.select_year_dropdown.get()),
-            self.select_state_dropdown.get(),
-        ),
-        daemon=True,
-    ).start()
+    self.select_state_dropdown.configure(values=self.states_in_msa)
+    self.content_banner_main_text.configure(
+        text=f"Census and Energy Data: {self.msa_name}"
+    )
+    self.zip_list = helper.metro_name_to_zip_code_list(msa_name)
+    self.zip_list = [str(zip) for zip in self.zip_list]
+
+    threading.Thread(
+        target=self.generate_energy_plot,
+        args=(
+            int(self.select_year_dropdown.get()),
+            self.select_state_dropdown.get(),
+        ),
+        daemon=True,
+    ).start()
 
@@ -2273,14 +2341,7 @@

Source code in src\gui\datapage.py -
319
-320
-321
-322
-323
-324
-325
-326
+            
326
 327
 328
 329
@@ -2290,24 +2351,31 @@ 

333 334 335 -336

def state_dropdown_callback(self, state: str) -> None:
-    """Banner state callback.
-    TODO:
-        check if thread is running with given name, and if so join it and start the new thread
-
-    Args:
-        state (str): the state after the change
-    """
-
-    threading.Thread(
-        target=self.generate_energy_plot,
-        args=(
-            int(self.select_year_dropdown.get()),
-            state,
-        ),
-        name="energy_thread",
-        daemon=True,
-    ).start()
+336
+337
+338
+339
+340
+341
+342
+343
def state_dropdown_callback(self, state: str) -> None:
+    """Banner state callback.
+    TODO:
+        check if thread is running with given name, and if so join it and start the new thread
+
+    Args:
+        state (str): the state after the change
+    """
+
+    threading.Thread(
+        target=self.generate_energy_plot,
+        args=(
+            int(self.select_year_dropdown.get()),
+            state,
+        ),
+        name="energy_thread",
+        daemon=True,
+    ).start()
 
@@ -2364,14 +2432,7 @@

Source code in src\gui\datapage.py -
338
-339
-340
-341
-342
-343
-344
-345
+            
345
 346
 347
 348
@@ -2380,23 +2441,30 @@ 

351 352 353 -354

def year_dropdown_callback(self, year: str) -> None:
-    """Banner year callback.
-    TODO:
-        Check if thread is running with given name, and if so join it and start the new thread
-
-    Args:
-        year (str): the year after the change
-    """
-    threading.Thread(
-        target=self.generate_energy_plot,
-        args=(
-            int(year),
-            self.select_state_dropdown.get(),
-        ),
-        name="energy_thread",
-        daemon=True,
-    ).start()
+354
+355
+356
+357
+358
+359
+360
+361
def year_dropdown_callback(self, year: str) -> None:
+    """Banner year callback.
+    TODO:
+        Check if thread is running with given name, and if so join it and start the new thread
+
+    Args:
+        year (str): the year after the change
+    """
+    threading.Thread(
+        target=self.generate_energy_plot,
+        args=(
+            int(year),
+            self.select_state_dropdown.get(),
+        ),
+        name="energy_thread",
+        daemon=True,
+    ).start()
 
diff --git a/gui/filterspage/index.html b/gui/filterspage/index.html index 6af93b2..224e8b4 100755 --- a/gui/filterspage/index.html +++ b/gui/filterspage/index.html @@ -761,8 +761,7 @@

Source code in src\gui\filterspage.py -
  7
-  8
+              
  8
   9
  10
  11
@@ -1116,362 +1115,383 @@ 

359 360 361 -362

class FiltersPage(ctk.CTkFrame):
-    def __init__(self, master: ctk.CTk, search_page: ctk.CTkFrame, **kwargs):
-        # main setup
-        super().__init__(master, **kwargs)
-        self.root = master
-        self.search_page = search_page
-        self.cur_year = datetime.datetime.now().year
-        self.year_list = [str(x) for x in range(2010, self.cur_year + 1)]
-        list.reverse(self.year_list)
-        self.sqft_list = [sqft.value for sqft in RedfinApi.Sqft]
-        list.reverse(self.sqft_list)
-        self.sold_within_list = [
-            "Last 1 week",
-            "Last 1 month",
-            "Last 3 months",
-            "Last 6 months",
-            "Last 1 year",
-            "Last 2 years",
-            "Last 3 years",
-            "Last 5 years",
-        ]
-        self.price_list = [price.value for price in RedfinApi.Price]
-        list.reverse(self.price_list)
-        self.create_widgets()
-        self.set_default_values()
-
-    def create_widgets(self) -> None:
-        """Create widgets."""
-        # frames
-        self.content_frame = ctk.CTkFrame(self)
-        self.for_sale_sold_frame = ctk.CTkFrame(
-            self.content_frame, width=300, height=100, fg_color="transparent"
-        )
-        self.stories_frame = ctk.CTkFrame(self.content_frame)
-        self.year_built_frame = ctk.CTkFrame(self.content_frame, fg_color="transparent")
-        self.home_type_frame = ctk.CTkFrame(self.content_frame)
-        self.square_feet_frame = ctk.CTkFrame(self.content_frame)
-        self.status_frame = ctk.CTkFrame(self.content_frame)
-        self.sold_within_frame = ctk.CTkFrame(self.content_frame)
-        self.price_range_frame = ctk.CTkFrame(self.content_frame)
-        self.reset_apply_frame = ctk.CTkFrame(self.content_frame)
-
-        # make more grid
-        self.columnconfigure((0, 2), weight=1)
-        self.columnconfigure(1, weight=30)
-        self.content_frame.columnconfigure((0), weight=1, uniform="a")  # uniform
-        self.for_sale_sold_frame.columnconfigure((0, 1), weight=1)
-        self.stories_frame.columnconfigure((0, 1), weight=1)
-        self.year_built_frame.columnconfigure((0, 1, 2, 3), weight=1)
-        self.home_type_frame.columnconfigure((0, 1, 2, 3), weight=1)
-        self.square_feet_frame.columnconfigure((0, 1, 2, 3), weight=1)
-        self.status_frame.columnconfigure((0, 1, 2), weight=1)
-        self.sold_within_frame.columnconfigure((0, 1), weight=1)
-        self.price_range_frame.columnconfigure((0, 1, 2, 3), weight=1)
-        self.reset_apply_frame.columnconfigure((0, 1), weight=1)
-
-        self.rowconfigure((0, 2), weight=1)
-        self.rowconfigure(1, weight=30)
-        self.content_frame.rowconfigure(
-            (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), weight=1, uniform="a"
-        )
-        self.for_sale_sold_frame.rowconfigure(0, weight=1)
-        self.stories_frame.rowconfigure(0, weight=1)
-        self.year_built_frame.rowconfigure((0, 1), weight=1)
-        self.home_type_frame.rowconfigure((0, 1, 2), weight=1)
-        self.square_feet_frame.rowconfigure((0, 1), weight=1)
-        self.status_frame.rowconfigure((0, 1), weight=1)
-        self.sold_within_frame.rowconfigure(0, weight=1)
-        self.price_range_frame.rowconfigure((0, 1), weight=1)
-        self.reset_apply_frame.rowconfigure(0, weight=1)
-
-        # placing the frames
-        self.content_frame.grid(row=1, column=1)
-        self.for_sale_sold_frame.grid(row=0, column=0, sticky="nsew")
-        self.stories_frame.grid(row=1, column=0, sticky="nesw")
-        self.year_built_frame.grid(row=2, column=0, sticky="nesw")
-        self.home_type_frame.grid(row=3, column=0, rowspan=2, sticky="nesw")
-        self.square_feet_frame.grid(row=5, column=0, sticky="nesw")
-        self.status_frame.grid(row=6, column=0)
-        self.sold_within_frame.grid(row=7, column=0, sticky="nesw")
-        self.price_range_frame.grid(row=8, column=0, rowspan=2, sticky="nesw")
-        self.reset_apply_frame.grid(row=10, column=0)
-
-        # Create the labels
-        self.for_sale_sold_label = ctk.CTkLabel(
-            self.for_sale_sold_frame, text="For Sale/Sold"
-        )
-        self.stories_label = ctk.CTkLabel(self.stories_frame, text="Stories")
-        self.year_built_label = ctk.CTkLabel(self.year_built_frame, text="Year Built")
-        self.home_type_label = ctk.CTkLabel(self.home_type_frame, text="Home Type")
-        self.sqft_label = ctk.CTkLabel(self.square_feet_frame, text="Square Feet")
-        self.sale_status_label = ctk.CTkLabel(self.status_frame, text="Status")
-        self.price_range_label = ctk.CTkLabel(
-            self.price_range_frame, text="Price Range"
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
class FiltersPage(ctk.CTkFrame):
+    def __init__(self, master: ctk.CTk, search_page: ctk.CTkFrame, **kwargs):
+        # main setup
+        super().__init__(master, **kwargs)
+        self.root = master
+        self.search_page = search_page
+        self.cur_year = datetime.datetime.now().year
+        self.year_list = [str(x) for x in range(2010, self.cur_year + 1)]
+        list.reverse(self.year_list)
+        self.sqft_list = [sqft.value for sqft in RedfinApi.Sqft]
+        list.reverse(self.sqft_list)
+        self.sold_within_list = [
+            "Last 1 week",
+            "Last 1 month",
+            "Last 3 months",
+            "Last 6 months",
+            "Last 1 year",
+            "Last 2 years",
+            "Last 3 years",
+            "Last 5 years",
+        ]
+        self.price_list = [price.value for price in RedfinApi.Price]
+        list.reverse(self.price_list)
+        self.create_widgets()
+        self.set_default_values()
+
+    def create_widgets(self) -> None:
+        """Create widgets."""
+        # frames
+        self.content_frame = ctk.CTkFrame(self)
+        self.for_sale_sold_frame = ctk.CTkFrame(
+            self.content_frame, width=300, height=100, fg_color="transparent"
+        )
+        self.stories_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)
+        self.year_built_frame = ctk.CTkFrame(
+            self.content_frame, corner_radius=0, fg_color="transparent"
+        )
+        self.home_type_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)
+        self.square_feet_frame = ctk.CTkFrame(
+            self.content_frame, corner_radius=0, fg_color="transparent"
+        )
+        self.status_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)
+        self.sold_within_frame = ctk.CTkFrame(
+            self.content_frame, fg_color="transparent", corner_radius=0
+        )
+        self.price_range_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)
+        self.reset_apply_frame = ctk.CTkFrame(
+            self.content_frame, fg_color="transparent", corner_radius=0
+        )
+
+        # make more grid
+        self.columnconfigure((0, 2), weight=1)
+        self.columnconfigure(1, weight=30)
+        self.content_frame.columnconfigure(0, weight=1, uniform="a")  # uniform
+        self.for_sale_sold_frame.columnconfigure((0, 1), weight=1)
+        self.stories_frame.columnconfigure((0, 1), weight=1)
+        self.year_built_frame.columnconfigure((0, 1, 2, 3), weight=1)
+        self.home_type_frame.columnconfigure((0, 1, 2, 3), weight=1)
+        self.square_feet_frame.columnconfigure((0, 1, 2, 3), weight=1)
+        self.status_frame.columnconfigure((0, 1, 2), weight=1)
+        self.sold_within_frame.columnconfigure((0, 1), weight=1)
+        self.price_range_frame.columnconfigure((0, 1, 2, 3), weight=1)
+        self.reset_apply_frame.columnconfigure((0, 1), weight=1)
+
+        self.rowconfigure(0, weight=1)
+        self.content_frame.rowconfigure(
+            (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), weight=1, uniform="a"
+        )
+        self.for_sale_sold_frame.rowconfigure(0, weight=1)
+        self.stories_frame.rowconfigure(0, weight=1)
+        self.year_built_frame.rowconfigure((0, 1), weight=1)
+        self.home_type_frame.rowconfigure((0, 1, 2), weight=1)
+        self.square_feet_frame.rowconfigure((0, 1), weight=1)
+        self.status_frame.rowconfigure((0, 1), weight=1)
+        self.sold_within_frame.rowconfigure(0, weight=1)
+        self.price_range_frame.rowconfigure((0, 1), weight=1)
+        self.reset_apply_frame.rowconfigure(0, weight=1)
+
+        # placing the frames
+        self.content_frame.grid(row=0, column=1, sticky="ns")
+        self.for_sale_sold_frame.grid(row=0, column=0, sticky="nsew")
+        self.stories_frame.grid(row=1, column=0, sticky="nesw")
+        self.year_built_frame.grid(row=2, column=0, sticky="nesw")
+        self.home_type_frame.grid(row=3, column=0, rowspan=2, sticky="nesw")
+        self.square_feet_frame.grid(row=5, column=0, sticky="nesw")
+        self.status_frame.grid(row=6, column=0)
+        self.sold_within_frame.grid(row=7, column=0, sticky="nesw")
+        self.price_range_frame.grid(row=8, column=0, rowspan=2, sticky="nesw")
+        self.reset_apply_frame.grid(row=10, column=0)
+
+        # Create the labels
+        self.for_sale_sold_label = ctk.CTkLabel(
+            self.for_sale_sold_frame, text="For Sale/Sold"
         )
-        self.price_range_from_label = ctk.CTkLabel(self.price_range_frame, text="From")
-        self.price_range_to_label = ctk.CTkLabel(self.price_range_frame, text="To")
-        self.year_built_from_label = ctk.CTkLabel(self.year_built_frame, text="From")
-        self.year_built_to_label = ctk.CTkLabel(self.year_built_frame, text="To")
-        self.sold_within_label = ctk.CTkLabel(
-            self.sold_within_frame, text="Sold Within"
-        )
-        self.sold_within_from_label = ctk.CTkLabel(self.square_feet_frame, text="From")
-        self.sold_within_to_label = ctk.CTkLabel(self.square_feet_frame, text="To")
-
-        # Create the Buttons
-        self.for_sale_sold_om = ctk.CTkOptionMenu(
-            master=self.for_sale_sold_frame,
-            values=[status.value for status in RedfinApi.SoldStatus],
-            command=lambda x: self.status_within_activate_deactivate(x),
-        )
-
-        self.min_stories_om = ctk.CTkOptionMenu(
-            self.stories_frame, values=[story.value for story in RedfinApi.Stories]
-        )
-
-        self.min_year_built_om = ctk.CTkOptionMenu(
-            self.year_built_frame,
-            values=self.year_list,
-            command=lambda x: self.year_validation(),
-        )
-
-        self.max_year_built_om = ctk.CTkOptionMenu(
-            self.year_built_frame,
-            values=self.year_list,
-            command=lambda x: self.year_validation(),
-        )
-
-        self.house_type_house_switch = ctk.CTkSwitch(
-            self.home_type_frame,
-            text="House",
-            command=self.house_type_validation,
-        )
-        self.house_type_townhouse_switch = ctk.CTkSwitch(
-            self.home_type_frame,
-            text="Townhouse",
-            command=self.house_type_validation,
-        )
-        self.house_type_condo_switch = ctk.CTkSwitch(
-            self.home_type_frame,
-            text="Condo",
-            command=self.house_type_validation,
-        )
-        self.house_type_mul_fam_switch = ctk.CTkSwitch(
-            self.home_type_frame,
-            text="Multi-Family",
-            command=self.house_type_validation,
-        )
-
-        self.min_sqft_om = ctk.CTkOptionMenu(
-            self.square_feet_frame,
-            values=self.sqft_list,
-            command=lambda x: self.sqft_validation(),
-        )
-        self.max_sqft_om = ctk.CTkOptionMenu(
-            self.square_feet_frame,
-            values=self.sqft_list,
-            command=lambda x: self.sqft_validation(),
-        )
-        self.status_coming_soon_chb = ctk.CTkCheckBox(
-            self.status_frame, text="Coming soon"
+        self.stories_label = ctk.CTkLabel(self.stories_frame, text="Stories")
+        self.year_built_label = ctk.CTkLabel(self.year_built_frame, text="Year Built")
+        self.home_type_label = ctk.CTkLabel(self.home_type_frame, text="Home Type")
+        self.sqft_label = ctk.CTkLabel(self.square_feet_frame, text="Square Feet")
+        self.sale_status_label = ctk.CTkLabel(self.status_frame, text="Status")
+        self.price_range_label = ctk.CTkLabel(
+            self.price_range_frame, text="Price Range"
+        )
+        self.price_range_from_label = ctk.CTkLabel(self.price_range_frame, text="From")
+        self.price_range_to_label = ctk.CTkLabel(self.price_range_frame, text="To")
+        self.year_built_from_label = ctk.CTkLabel(self.year_built_frame, text="From")
+        self.year_built_to_label = ctk.CTkLabel(self.year_built_frame, text="To")
+        self.sold_within_label = ctk.CTkLabel(
+            self.sold_within_frame, text="Sold Within"
+        )
+        self.sold_within_from_label = ctk.CTkLabel(self.square_feet_frame, text="From")
+        self.sold_within_to_label = ctk.CTkLabel(self.square_feet_frame, text="To")
+
+        # Create the Buttons
+        self.for_sale_sold_om = ctk.CTkOptionMenu(
+            master=self.for_sale_sold_frame,
+            values=[status.value for status in RedfinApi.SoldStatus],
+            command=lambda x: self.status_within_activate_deactivate(x),
+        )
+
+        self.min_stories_om = ctk.CTkOptionMenu(
+            self.stories_frame, values=[story.value for story in RedfinApi.Stories]
+        )
+
+        self.min_year_built_om = ctk.CTkOptionMenu(
+            self.year_built_frame,
+            values=self.year_list,
+            command=lambda x: self.year_validation(),
+        )
+
+        self.max_year_built_om = ctk.CTkOptionMenu(
+            self.year_built_frame,
+            values=self.year_list,
+            command=lambda x: self.year_validation(),
+        )
+
+        self.house_type_house_switch = ctk.CTkSwitch(
+            self.home_type_frame,
+            text="House",
+            command=self.house_type_validation,
+        )
+        self.house_type_townhouse_switch = ctk.CTkSwitch(
+            self.home_type_frame,
+            text="Townhouse",
+            command=self.house_type_validation,
+        )
+        self.house_type_condo_switch = ctk.CTkSwitch(
+            self.home_type_frame,
+            text="Condo",
+            command=self.house_type_validation,
+        )
+        self.house_type_mul_fam_switch = ctk.CTkSwitch(
+            self.home_type_frame,
+            text="Multi-Family",
+            command=self.house_type_validation,
+        )
+
+        self.min_sqft_om = ctk.CTkOptionMenu(
+            self.square_feet_frame,
+            values=self.sqft_list,
+            command=lambda x: self.sqft_validation(),
         )
-        self.status_active_chb = ctk.CTkCheckBox(self.status_frame, text="Active")
-        self.status_pending_chb = ctk.CTkCheckBox(
-            self.status_frame, text="Under contract/Pending"
-        )  # missing one i think
-        self.sold_within_om = ctk.CTkOptionMenu(
-            self.sold_within_frame, values=self.sold_within_list
-        )
-
-        self.min_price_om = ctk.CTkOptionMenu(
-            self.price_range_frame,
-            values=self.price_list,
-            command=lambda x: self.price_validation(),
-        )
-        self.max_price_om = ctk.CTkOptionMenu(
-            self.price_range_frame,
-            values=self.price_list,
-            command=lambda x: self.price_validation(),
-        )
-
-        self.reset_filters_button = ctk.CTkButton(
-            self.reset_apply_frame,
-            text="Reset Filters",
-            command=self.set_default_values,
-        )
-        self.apply_filters_button = ctk.CTkButton(
-            self.reset_apply_frame,
-            text="Apply Filters",
-            command=self.change_to_search_page,
-        )
-
-        # Placing the widgets
-        self.for_sale_sold_label.grid(row=0, column=0)
-        self.stories_label.grid(row=0, column=0)
-        self.year_built_label.grid(row=0, column=0)
-        self.home_type_label.grid(row=0, column=0)
-        self.sqft_label.grid(row=0, column=0)
-        self.sale_status_label.grid(row=0, column=0)
-        self.price_range_label.grid(row=0, column=0)
-        self.year_built_from_label.grid(row=1, column=0)
-        self.year_built_to_label.grid(row=1, column=2)
-        self.price_range_from_label.grid(row=1, column=0)
-        self.price_range_to_label.grid(row=1, column=2)
-        self.sold_within_label.grid(row=0, column=0)
-        self.sold_within_from_label.grid(row=1, column=0)
-        self.sold_within_to_label.grid(row=1, column=2)
-
-        self.for_sale_sold_om.grid(row=0, column=1)
-        self.min_stories_om.grid(row=0, column=1)
-        self.min_year_built_om.grid(row=1, column=1)
-        self.max_year_built_om.grid(row=1, column=3)
-        self.min_sqft_om.grid(row=1, column=1)
-        self.max_sqft_om.grid(row=1, column=3)
-        self.sold_within_om.grid(row=0, column=1)
-        self.min_price_om.grid(row=1, column=1)
-        self.max_price_om.grid(row=1, column=3)
-        self.house_type_house_switch.grid(row=1, column=0)
-        self.house_type_townhouse_switch.grid(row=1, column=1)
-        self.house_type_condo_switch.grid(row=2, column=0)
-        self.house_type_mul_fam_switch.grid(row=2, column=1)
-        self.status_coming_soon_chb.grid(row=1, column=0)
-        self.status_active_chb.grid(row=1, column=1)
-        self.status_pending_chb.grid(row=1, column=2)
-        self.reset_filters_button.grid(row=0, column=0, sticky="nesw")
-        self.apply_filters_button.grid(row=0, column=1, sticky="nesw")
-
-    def set_default_values(self) -> None:
-        """Set the default values for all widgets.
-        Note:
-            Should be called after init and when clicking reset button.
-        """
-        self.for_sale_sold_om.set(RedfinApi.SoldStatus.SOLD.value)
-        self.min_stories_om.set(RedfinApi.Stories.ONE.value)
-        self.min_year_built_om.set(str(self.cur_year - 1))
-        self.max_year_built_om.set(str(self.cur_year - 1))
-        self.sold_within_om.set(self.sold_within_list[-1])
-        self.max_price_om.set(RedfinApi.Price.NONE.value)
-        self.min_price_om.set(RedfinApi.Price.NONE.value)
-        self.max_sqft_om.set(RedfinApi.Sqft.NONE.value)
-        self.min_sqft_om.set(RedfinApi.Sqft.NONE.value)
-        self.status_active_chb.deselect()
-        self.status_pending_chb.deselect()
-        self.status_coming_soon_chb.deselect()
-        self.house_type_house_switch.select()
-        self.house_type_condo_switch.deselect()
-        self.house_type_townhouse_switch.deselect()
-        self.house_type_mul_fam_switch.deselect()
-        self.status_within_activate_deactivate(self.for_sale_sold_om.get())
-
-    def status_within_activate_deactivate(self, status) -> None:
-        """Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.
-
-        Args:
-            status (Event): ignored
-        """
-        match self.for_sale_sold_om.get():
-            case RedfinApi.SoldStatus.FOR_SALE.value:
-                self.sale_status_label.configure(state="normal")
-                self.status_active_chb.configure(state="normal")
-                self.status_coming_soon_chb.configure(state="normal")
-                self.status_pending_chb.configure(state="normal")
-                self.sold_within_label.configure(state="disabled")
-                self.sold_within_om.configure(state="disabled")
-            case RedfinApi.SoldStatus.SOLD.value:
-                self.sale_status_label.configure(state="disabled")
-                self.status_active_chb.configure(state="disabled")
-                self.status_coming_soon_chb.configure(state="disabled")
-                self.status_pending_chb.configure(state="disabled")
-                self.sold_within_label.configure(state="normal")
-                self.sold_within_om.configure(state="normal")
-
-    def change_to_search_page(self) -> None:
-        """Change to search page."""
-        self.grid_remove()
-        self.search_page.grid()
-
-    def price_validation(self):
-        """Called when price range min om gets changed"""
-        if (
-            self.max_price_om.get() == RedfinApi.Price.NONE.value
-            or self.min_price_om.get() == RedfinApi.Price.NONE.value
-        ):
-            return
-        if int(self.max_price_om.get()) < int(self.min_price_om.get()):
-            self.max_price_om.set(self.min_price_om.get())
-
-    def year_validation(self) -> None:
-        """Year drop down callback"""
-        if int(self.max_year_built_om.get()) < int(self.min_year_built_om.get()):
-            self.max_year_built_om.set(self.min_year_built_om.get())
-
-    def sqft_validation(self) -> None:
-        """Sqft dropdown callback"""
-        if (
-            self.max_sqft_om.get() == RedfinApi.Sqft.NONE.value
-            or self.min_sqft_om.get() == RedfinApi.Sqft.NONE.value
-        ):
-            return
-        if int(self.max_sqft_om.get()) < int(self.min_sqft_om.get()):
-            self.max_sqft_om.set(self.min_sqft_om.get())
-
-    def house_type_validation(self) -> None:
-        """House type switch validation to make sure at lest house is selected."""
-        if not any(
-            [
-                self.house_type_house_switch.get(),
-                self.house_type_condo_switch.get(),
-                self.house_type_mul_fam_switch.get(),
-                self.house_type_townhouse_switch.get(),
-            ]
-        ):
-            self.house_type_house_switch.select()
-
-    def get_values(self) -> dict[str, Any]:
-        """Get the values of all widgets on this page.
-
-        Returns:
-            dict[str, Any]: dict of values
-        """
-        match self.sold_within_om.get():
-            case "Last 1 week":
-                sold_within_days = RedfinApi.SoldWithinDays.ONE_WEEK
-            case "Last 1 month":
-                sold_within_days = RedfinApi.SoldWithinDays.ONE_MONTH
-            case "Last 3 months":
-                sold_within_days = RedfinApi.SoldWithinDays.THREE_MONTHS
-            case "Last 6 months":
-                sold_within_days = RedfinApi.SoldWithinDays.SIX_MONTHS
-            case "Last 1 year":
-                sold_within_days = RedfinApi.SoldWithinDays.ONE_YEAR
-            case "Last 2 years":
-                sold_within_days = RedfinApi.SoldWithinDays.TWO_YEARS
-            case "Last 3 years":
-                sold_within_days = RedfinApi.SoldWithinDays.THREE_YEARS
-            case _:
-                sold_within_days = RedfinApi.SoldWithinDays.FIVE_YEARS
-
-        return {
-            "for sale sold": self.for_sale_sold_om.get(),
-            "min stories": self.min_stories_om.get(),
-            "max year built": self.max_year_built_om.get(),  # do validation here
-            "min year built": self.min_year_built_om.get(),
-            "sold within": sold_within_days.value,
-            "status active": bool(self.status_active_chb.get()),
-            "status coming soon": bool(self.status_coming_soon_chb.get()),
-            "status pending": bool(self.status_pending_chb.get()),
-            "house type house": bool(self.house_type_house_switch.get()),
-            "house type townhouse": bool(self.house_type_townhouse_switch.get()),
-            "house type mul fam": bool(self.house_type_mul_fam_switch.get()),
-            "house type condo": bool(self.house_type_condo_switch.get()),
-            "max sqft": self.max_sqft_om.get(),
-            "min sqft": self.min_sqft_om.get(),
-            "max price": self.max_price_om.get(),
-            "min price": self.min_price_om.get(),
-        }
+        self.max_sqft_om = ctk.CTkOptionMenu(
+            self.square_feet_frame,
+            values=self.sqft_list,
+            command=lambda x: self.sqft_validation(),
+        )
+        self.status_coming_soon_chb = ctk.CTkCheckBox(
+            self.status_frame, text="Coming soon"
+        )
+        self.status_active_chb = ctk.CTkCheckBox(self.status_frame, text="Active")
+        self.status_pending_chb = ctk.CTkCheckBox(
+            self.status_frame, text="Under contract/Pending"
+        )  # missing one i think
+        self.sold_within_om = ctk.CTkOptionMenu(
+            self.sold_within_frame, values=self.sold_within_list
+        )
+
+        self.min_price_om = ctk.CTkOptionMenu(
+            self.price_range_frame,
+            values=self.price_list,
+            command=lambda x: self.price_validation(),
+        )
+        self.max_price_om = ctk.CTkOptionMenu(
+            self.price_range_frame,
+            values=self.price_list,
+            command=lambda x: self.price_validation(),
+        )
+
+        self.reset_filters_button = ctk.CTkButton(
+            self.reset_apply_frame,
+            text="Reset Filters",
+            command=self.set_default_values,
+        )
+        self.apply_filters_button = ctk.CTkButton(
+            self.reset_apply_frame,
+            text="Apply Filters",
+            command=self.change_to_search_page,
+        )
+
+        # Placing the widgets
+        self.for_sale_sold_label.grid(row=0, column=0)
+        self.stories_label.grid(row=0, column=0)
+        self.year_built_label.grid(row=0, column=0)
+        self.home_type_label.grid(row=0, column=0)
+        self.sqft_label.grid(row=0, column=0)
+        self.sale_status_label.grid(row=0, column=0)
+        self.price_range_label.grid(row=0, column=0)
+        self.year_built_from_label.grid(row=1, column=0)
+        self.year_built_to_label.grid(row=1, column=2)
+        self.price_range_from_label.grid(row=1, column=0)
+        self.price_range_to_label.grid(row=1, column=2)
+        self.sold_within_label.grid(row=0, column=0)
+        self.sold_within_from_label.grid(row=1, column=0)
+        self.sold_within_to_label.grid(row=1, column=2)
+
+        self.for_sale_sold_om.grid(row=0, column=1)
+        self.min_stories_om.grid(row=0, column=1)
+        self.min_year_built_om.grid(row=1, column=1)
+        self.max_year_built_om.grid(row=1, column=3)
+        self.min_sqft_om.grid(row=1, column=1)
+        self.max_sqft_om.grid(row=1, column=3)
+        self.sold_within_om.grid(row=0, column=1)
+        self.min_price_om.grid(row=1, column=1)
+        self.max_price_om.grid(row=1, column=3)
+        self.house_type_house_switch.grid(row=1, column=0)
+        self.house_type_townhouse_switch.grid(row=1, column=1)
+        self.house_type_condo_switch.grid(row=2, column=0)
+        self.house_type_mul_fam_switch.grid(row=2, column=1)
+        self.status_coming_soon_chb.grid(row=1, column=0)
+        self.status_active_chb.grid(row=1, column=1)
+        self.status_pending_chb.grid(row=1, column=2)
+        self.reset_filters_button.grid(row=0, column=0, sticky="w")
+        self.apply_filters_button.grid(row=0, column=1, sticky="e")
+
+    def set_default_values(self) -> None:
+        """Set the default values for all widgets.
+        Note:
+            Should be called after init and when clicking reset button.
+        """
+        self.for_sale_sold_om.set(RedfinApi.SoldStatus.SOLD.value)
+        self.min_stories_om.set(RedfinApi.Stories.ONE.value)
+        self.min_year_built_om.set(str(self.cur_year - 1))
+        self.max_year_built_om.set(str(self.cur_year - 1))
+        self.sold_within_om.set(self.sold_within_list[-1])
+        self.max_price_om.set(RedfinApi.Price.NONE.value)
+        self.min_price_om.set(RedfinApi.Price.NONE.value)
+        self.max_sqft_om.set(RedfinApi.Sqft.NONE.value)
+        self.min_sqft_om.set(RedfinApi.Sqft.NONE.value)
+        self.status_active_chb.deselect()
+        self.status_pending_chb.deselect()
+        self.status_coming_soon_chb.deselect()
+        self.house_type_house_switch.select()
+        self.house_type_condo_switch.deselect()
+        self.house_type_townhouse_switch.deselect()
+        self.house_type_mul_fam_switch.deselect()
+        self.status_within_activate_deactivate(self.for_sale_sold_om.get())
+
+    def status_within_activate_deactivate(self, status) -> None:
+        """Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.
+
+        Args:
+            status (Event): ignored
+        """
+        match self.for_sale_sold_om.get():
+            case RedfinApi.SoldStatus.FOR_SALE.value:
+                self.sale_status_label.configure(state="normal")
+                self.status_active_chb.configure(state="normal")
+                self.status_coming_soon_chb.configure(state="normal")
+                self.status_pending_chb.configure(state="normal")
+                self.sold_within_label.configure(state="disabled")
+                self.sold_within_om.configure(state="disabled")
+            case RedfinApi.SoldStatus.SOLD.value:
+                self.sale_status_label.configure(state="disabled")
+                self.status_active_chb.configure(state="disabled")
+                self.status_coming_soon_chb.configure(state="disabled")
+                self.status_pending_chb.configure(state="disabled")
+                self.sold_within_label.configure(state="normal")
+                self.sold_within_om.configure(state="normal")
+                self.status_active_chb.deselect()
+                self.status_pending_chb.deselect()
+                self.status_coming_soon_chb.deselect()
+
+    def change_to_search_page(self) -> None:
+        """Change to search page."""
+        self.grid_remove()
+        self.search_page.grid()
+
+    def price_validation(self):
+        """Called when price range min om gets changed"""
+        if (
+            self.max_price_om.get() == RedfinApi.Price.NONE.value
+            or self.min_price_om.get() == RedfinApi.Price.NONE.value
+        ):
+            return
+        if int(self.max_price_om.get()) < int(self.min_price_om.get()):
+            self.max_price_om.set(self.min_price_om.get())
+
+    def year_validation(self) -> None:
+        """Year drop down callback"""
+        if int(self.max_year_built_om.get()) < int(self.min_year_built_om.get()):
+            self.max_year_built_om.set(self.min_year_built_om.get())
+
+    def sqft_validation(self) -> None:
+        """Sqft dropdown callback"""
+        if (
+            self.max_sqft_om.get() == RedfinApi.Sqft.NONE.value
+            or self.min_sqft_om.get() == RedfinApi.Sqft.NONE.value
+        ):
+            return
+        if int(self.max_sqft_om.get()) < int(self.min_sqft_om.get()):
+            self.max_sqft_om.set(self.min_sqft_om.get())
+
+    def house_type_validation(self) -> None:
+        """House type switch validation to make sure at lest house is selected."""
+        if not any(
+            [
+                self.house_type_house_switch.get(),
+                self.house_type_condo_switch.get(),
+                self.house_type_mul_fam_switch.get(),
+                self.house_type_townhouse_switch.get(),
+            ]
+        ):
+            self.house_type_house_switch.select()
+
+    def get_values(self) -> dict[str, Any]:
+        """Get the values of all widgets on this page.
+
+        Returns:
+            dict[str, Any]: dict of values
+        """
+        match self.sold_within_om.get():
+            case "Last 1 week":
+                sold_within_days = RedfinApi.SoldWithinDays.ONE_WEEK
+            case "Last 1 month":
+                sold_within_days = RedfinApi.SoldWithinDays.ONE_MONTH
+            case "Last 3 months":
+                sold_within_days = RedfinApi.SoldWithinDays.THREE_MONTHS
+            case "Last 6 months":
+                sold_within_days = RedfinApi.SoldWithinDays.SIX_MONTHS
+            case "Last 1 year":
+                sold_within_days = RedfinApi.SoldWithinDays.ONE_YEAR
+            case "Last 2 years":
+                sold_within_days = RedfinApi.SoldWithinDays.TWO_YEARS
+            case "Last 3 years":
+                sold_within_days = RedfinApi.SoldWithinDays.THREE_YEARS
+            case _:
+                sold_within_days = RedfinApi.SoldWithinDays.FIVE_YEARS
+
+        return {
+            "for sale sold": self.for_sale_sold_om.get(),
+            "min stories": self.min_stories_om.get(),
+            "max year built": self.max_year_built_om.get(),  # do validation here
+            "min year built": self.min_year_built_om.get(),
+            "sold within": sold_within_days.value,
+            "status active": bool(self.status_active_chb.get()),
+            "status coming soon": bool(self.status_coming_soon_chb.get()),
+            "status pending": bool(self.status_pending_chb.get()),
+            "house type house": bool(self.house_type_house_switch.get()),
+            "house type townhouse": bool(self.house_type_townhouse_switch.get()),
+            "house type mul fam": bool(self.house_type_mul_fam_switch.get()),
+            "house type condo": bool(self.house_type_condo_switch.get()),
+            "max sqft": self.max_sqft_om.get(),
+            "min sqft": self.min_sqft_om.get(),
+            "max price": self.max_price_om.get(),
+            "min price": self.min_price_om.get(),
+        }
 
@@ -1505,13 +1525,13 @@

Source code in src\gui\filterspage.py -
def change_to_search_page(self) -> None:
-    """Change to search page."""
-    self.grid_remove()
-    self.search_page.grid()
+            
def change_to_search_page(self) -> None:
+    """Change to search page."""
+    self.grid_remove()
+    self.search_page.grid()
 
@@ -1536,8 +1556,7 @@

Source code in src\gui\filterspage.py -
 33
- 34
+            
 34
  35
  36
  37
@@ -1735,206 +1754,221 @@ 

229 230 231 -232

def create_widgets(self) -> None:
-    """Create widgets."""
-    # frames
-    self.content_frame = ctk.CTkFrame(self)
-    self.for_sale_sold_frame = ctk.CTkFrame(
-        self.content_frame, width=300, height=100, fg_color="transparent"
-    )
-    self.stories_frame = ctk.CTkFrame(self.content_frame)
-    self.year_built_frame = ctk.CTkFrame(self.content_frame, fg_color="transparent")
-    self.home_type_frame = ctk.CTkFrame(self.content_frame)
-    self.square_feet_frame = ctk.CTkFrame(self.content_frame)
-    self.status_frame = ctk.CTkFrame(self.content_frame)
-    self.sold_within_frame = ctk.CTkFrame(self.content_frame)
-    self.price_range_frame = ctk.CTkFrame(self.content_frame)
-    self.reset_apply_frame = ctk.CTkFrame(self.content_frame)
-
-    # make more grid
-    self.columnconfigure((0, 2), weight=1)
-    self.columnconfigure(1, weight=30)
-    self.content_frame.columnconfigure((0), weight=1, uniform="a")  # uniform
-    self.for_sale_sold_frame.columnconfigure((0, 1), weight=1)
-    self.stories_frame.columnconfigure((0, 1), weight=1)
-    self.year_built_frame.columnconfigure((0, 1, 2, 3), weight=1)
-    self.home_type_frame.columnconfigure((0, 1, 2, 3), weight=1)
-    self.square_feet_frame.columnconfigure((0, 1, 2, 3), weight=1)
-    self.status_frame.columnconfigure((0, 1, 2), weight=1)
-    self.sold_within_frame.columnconfigure((0, 1), weight=1)
-    self.price_range_frame.columnconfigure((0, 1, 2, 3), weight=1)
-    self.reset_apply_frame.columnconfigure((0, 1), weight=1)
-
-    self.rowconfigure((0, 2), weight=1)
-    self.rowconfigure(1, weight=30)
-    self.content_frame.rowconfigure(
-        (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), weight=1, uniform="a"
-    )
-    self.for_sale_sold_frame.rowconfigure(0, weight=1)
-    self.stories_frame.rowconfigure(0, weight=1)
-    self.year_built_frame.rowconfigure((0, 1), weight=1)
-    self.home_type_frame.rowconfigure((0, 1, 2), weight=1)
-    self.square_feet_frame.rowconfigure((0, 1), weight=1)
-    self.status_frame.rowconfigure((0, 1), weight=1)
-    self.sold_within_frame.rowconfigure(0, weight=1)
-    self.price_range_frame.rowconfigure((0, 1), weight=1)
-    self.reset_apply_frame.rowconfigure(0, weight=1)
-
-    # placing the frames
-    self.content_frame.grid(row=1, column=1)
-    self.for_sale_sold_frame.grid(row=0, column=0, sticky="nsew")
-    self.stories_frame.grid(row=1, column=0, sticky="nesw")
-    self.year_built_frame.grid(row=2, column=0, sticky="nesw")
-    self.home_type_frame.grid(row=3, column=0, rowspan=2, sticky="nesw")
-    self.square_feet_frame.grid(row=5, column=0, sticky="nesw")
-    self.status_frame.grid(row=6, column=0)
-    self.sold_within_frame.grid(row=7, column=0, sticky="nesw")
-    self.price_range_frame.grid(row=8, column=0, rowspan=2, sticky="nesw")
-    self.reset_apply_frame.grid(row=10, column=0)
-
-    # Create the labels
-    self.for_sale_sold_label = ctk.CTkLabel(
-        self.for_sale_sold_frame, text="For Sale/Sold"
-    )
-    self.stories_label = ctk.CTkLabel(self.stories_frame, text="Stories")
-    self.year_built_label = ctk.CTkLabel(self.year_built_frame, text="Year Built")
-    self.home_type_label = ctk.CTkLabel(self.home_type_frame, text="Home Type")
-    self.sqft_label = ctk.CTkLabel(self.square_feet_frame, text="Square Feet")
-    self.sale_status_label = ctk.CTkLabel(self.status_frame, text="Status")
-    self.price_range_label = ctk.CTkLabel(
-        self.price_range_frame, text="Price Range"
+232
+233
+234
+235
+236
+237
+238
+239
+240
def create_widgets(self) -> None:
+    """Create widgets."""
+    # frames
+    self.content_frame = ctk.CTkFrame(self)
+    self.for_sale_sold_frame = ctk.CTkFrame(
+        self.content_frame, width=300, height=100, fg_color="transparent"
+    )
+    self.stories_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)
+    self.year_built_frame = ctk.CTkFrame(
+        self.content_frame, corner_radius=0, fg_color="transparent"
+    )
+    self.home_type_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)
+    self.square_feet_frame = ctk.CTkFrame(
+        self.content_frame, corner_radius=0, fg_color="transparent"
+    )
+    self.status_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)
+    self.sold_within_frame = ctk.CTkFrame(
+        self.content_frame, fg_color="transparent", corner_radius=0
+    )
+    self.price_range_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)
+    self.reset_apply_frame = ctk.CTkFrame(
+        self.content_frame, fg_color="transparent", corner_radius=0
+    )
+
+    # make more grid
+    self.columnconfigure((0, 2), weight=1)
+    self.columnconfigure(1, weight=30)
+    self.content_frame.columnconfigure(0, weight=1, uniform="a")  # uniform
+    self.for_sale_sold_frame.columnconfigure((0, 1), weight=1)
+    self.stories_frame.columnconfigure((0, 1), weight=1)
+    self.year_built_frame.columnconfigure((0, 1, 2, 3), weight=1)
+    self.home_type_frame.columnconfigure((0, 1, 2, 3), weight=1)
+    self.square_feet_frame.columnconfigure((0, 1, 2, 3), weight=1)
+    self.status_frame.columnconfigure((0, 1, 2), weight=1)
+    self.sold_within_frame.columnconfigure((0, 1), weight=1)
+    self.price_range_frame.columnconfigure((0, 1, 2, 3), weight=1)
+    self.reset_apply_frame.columnconfigure((0, 1), weight=1)
+
+    self.rowconfigure(0, weight=1)
+    self.content_frame.rowconfigure(
+        (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), weight=1, uniform="a"
+    )
+    self.for_sale_sold_frame.rowconfigure(0, weight=1)
+    self.stories_frame.rowconfigure(0, weight=1)
+    self.year_built_frame.rowconfigure((0, 1), weight=1)
+    self.home_type_frame.rowconfigure((0, 1, 2), weight=1)
+    self.square_feet_frame.rowconfigure((0, 1), weight=1)
+    self.status_frame.rowconfigure((0, 1), weight=1)
+    self.sold_within_frame.rowconfigure(0, weight=1)
+    self.price_range_frame.rowconfigure((0, 1), weight=1)
+    self.reset_apply_frame.rowconfigure(0, weight=1)
+
+    # placing the frames
+    self.content_frame.grid(row=0, column=1, sticky="ns")
+    self.for_sale_sold_frame.grid(row=0, column=0, sticky="nsew")
+    self.stories_frame.grid(row=1, column=0, sticky="nesw")
+    self.year_built_frame.grid(row=2, column=0, sticky="nesw")
+    self.home_type_frame.grid(row=3, column=0, rowspan=2, sticky="nesw")
+    self.square_feet_frame.grid(row=5, column=0, sticky="nesw")
+    self.status_frame.grid(row=6, column=0)
+    self.sold_within_frame.grid(row=7, column=0, sticky="nesw")
+    self.price_range_frame.grid(row=8, column=0, rowspan=2, sticky="nesw")
+    self.reset_apply_frame.grid(row=10, column=0)
+
+    # Create the labels
+    self.for_sale_sold_label = ctk.CTkLabel(
+        self.for_sale_sold_frame, text="For Sale/Sold"
     )
-    self.price_range_from_label = ctk.CTkLabel(self.price_range_frame, text="From")
-    self.price_range_to_label = ctk.CTkLabel(self.price_range_frame, text="To")
-    self.year_built_from_label = ctk.CTkLabel(self.year_built_frame, text="From")
-    self.year_built_to_label = ctk.CTkLabel(self.year_built_frame, text="To")
-    self.sold_within_label = ctk.CTkLabel(
-        self.sold_within_frame, text="Sold Within"
-    )
-    self.sold_within_from_label = ctk.CTkLabel(self.square_feet_frame, text="From")
-    self.sold_within_to_label = ctk.CTkLabel(self.square_feet_frame, text="To")
-
-    # Create the Buttons
-    self.for_sale_sold_om = ctk.CTkOptionMenu(
-        master=self.for_sale_sold_frame,
-        values=[status.value for status in RedfinApi.SoldStatus],
-        command=lambda x: self.status_within_activate_deactivate(x),
-    )
-
-    self.min_stories_om = ctk.CTkOptionMenu(
-        self.stories_frame, values=[story.value for story in RedfinApi.Stories]
-    )
-
-    self.min_year_built_om = ctk.CTkOptionMenu(
-        self.year_built_frame,
-        values=self.year_list,
-        command=lambda x: self.year_validation(),
-    )
-
-    self.max_year_built_om = ctk.CTkOptionMenu(
-        self.year_built_frame,
-        values=self.year_list,
-        command=lambda x: self.year_validation(),
-    )
-
-    self.house_type_house_switch = ctk.CTkSwitch(
-        self.home_type_frame,
-        text="House",
-        command=self.house_type_validation,
-    )
-    self.house_type_townhouse_switch = ctk.CTkSwitch(
-        self.home_type_frame,
-        text="Townhouse",
-        command=self.house_type_validation,
-    )
-    self.house_type_condo_switch = ctk.CTkSwitch(
-        self.home_type_frame,
-        text="Condo",
-        command=self.house_type_validation,
-    )
-    self.house_type_mul_fam_switch = ctk.CTkSwitch(
-        self.home_type_frame,
-        text="Multi-Family",
-        command=self.house_type_validation,
-    )
-
-    self.min_sqft_om = ctk.CTkOptionMenu(
-        self.square_feet_frame,
-        values=self.sqft_list,
-        command=lambda x: self.sqft_validation(),
-    )
-    self.max_sqft_om = ctk.CTkOptionMenu(
-        self.square_feet_frame,
-        values=self.sqft_list,
-        command=lambda x: self.sqft_validation(),
-    )
-    self.status_coming_soon_chb = ctk.CTkCheckBox(
-        self.status_frame, text="Coming soon"
+    self.stories_label = ctk.CTkLabel(self.stories_frame, text="Stories")
+    self.year_built_label = ctk.CTkLabel(self.year_built_frame, text="Year Built")
+    self.home_type_label = ctk.CTkLabel(self.home_type_frame, text="Home Type")
+    self.sqft_label = ctk.CTkLabel(self.square_feet_frame, text="Square Feet")
+    self.sale_status_label = ctk.CTkLabel(self.status_frame, text="Status")
+    self.price_range_label = ctk.CTkLabel(
+        self.price_range_frame, text="Price Range"
+    )
+    self.price_range_from_label = ctk.CTkLabel(self.price_range_frame, text="From")
+    self.price_range_to_label = ctk.CTkLabel(self.price_range_frame, text="To")
+    self.year_built_from_label = ctk.CTkLabel(self.year_built_frame, text="From")
+    self.year_built_to_label = ctk.CTkLabel(self.year_built_frame, text="To")
+    self.sold_within_label = ctk.CTkLabel(
+        self.sold_within_frame, text="Sold Within"
+    )
+    self.sold_within_from_label = ctk.CTkLabel(self.square_feet_frame, text="From")
+    self.sold_within_to_label = ctk.CTkLabel(self.square_feet_frame, text="To")
+
+    # Create the Buttons
+    self.for_sale_sold_om = ctk.CTkOptionMenu(
+        master=self.for_sale_sold_frame,
+        values=[status.value for status in RedfinApi.SoldStatus],
+        command=lambda x: self.status_within_activate_deactivate(x),
+    )
+
+    self.min_stories_om = ctk.CTkOptionMenu(
+        self.stories_frame, values=[story.value for story in RedfinApi.Stories]
+    )
+
+    self.min_year_built_om = ctk.CTkOptionMenu(
+        self.year_built_frame,
+        values=self.year_list,
+        command=lambda x: self.year_validation(),
+    )
+
+    self.max_year_built_om = ctk.CTkOptionMenu(
+        self.year_built_frame,
+        values=self.year_list,
+        command=lambda x: self.year_validation(),
+    )
+
+    self.house_type_house_switch = ctk.CTkSwitch(
+        self.home_type_frame,
+        text="House",
+        command=self.house_type_validation,
+    )
+    self.house_type_townhouse_switch = ctk.CTkSwitch(
+        self.home_type_frame,
+        text="Townhouse",
+        command=self.house_type_validation,
+    )
+    self.house_type_condo_switch = ctk.CTkSwitch(
+        self.home_type_frame,
+        text="Condo",
+        command=self.house_type_validation,
+    )
+    self.house_type_mul_fam_switch = ctk.CTkSwitch(
+        self.home_type_frame,
+        text="Multi-Family",
+        command=self.house_type_validation,
+    )
+
+    self.min_sqft_om = ctk.CTkOptionMenu(
+        self.square_feet_frame,
+        values=self.sqft_list,
+        command=lambda x: self.sqft_validation(),
     )
-    self.status_active_chb = ctk.CTkCheckBox(self.status_frame, text="Active")
-    self.status_pending_chb = ctk.CTkCheckBox(
-        self.status_frame, text="Under contract/Pending"
-    )  # missing one i think
-    self.sold_within_om = ctk.CTkOptionMenu(
-        self.sold_within_frame, values=self.sold_within_list
-    )
-
-    self.min_price_om = ctk.CTkOptionMenu(
-        self.price_range_frame,
-        values=self.price_list,
-        command=lambda x: self.price_validation(),
-    )
-    self.max_price_om = ctk.CTkOptionMenu(
-        self.price_range_frame,
-        values=self.price_list,
-        command=lambda x: self.price_validation(),
-    )
-
-    self.reset_filters_button = ctk.CTkButton(
-        self.reset_apply_frame,
-        text="Reset Filters",
-        command=self.set_default_values,
-    )
-    self.apply_filters_button = ctk.CTkButton(
-        self.reset_apply_frame,
-        text="Apply Filters",
-        command=self.change_to_search_page,
-    )
-
-    # Placing the widgets
-    self.for_sale_sold_label.grid(row=0, column=0)
-    self.stories_label.grid(row=0, column=0)
-    self.year_built_label.grid(row=0, column=0)
-    self.home_type_label.grid(row=0, column=0)
-    self.sqft_label.grid(row=0, column=0)
-    self.sale_status_label.grid(row=0, column=0)
-    self.price_range_label.grid(row=0, column=0)
-    self.year_built_from_label.grid(row=1, column=0)
-    self.year_built_to_label.grid(row=1, column=2)
-    self.price_range_from_label.grid(row=1, column=0)
-    self.price_range_to_label.grid(row=1, column=2)
-    self.sold_within_label.grid(row=0, column=0)
-    self.sold_within_from_label.grid(row=1, column=0)
-    self.sold_within_to_label.grid(row=1, column=2)
-
-    self.for_sale_sold_om.grid(row=0, column=1)
-    self.min_stories_om.grid(row=0, column=1)
-    self.min_year_built_om.grid(row=1, column=1)
-    self.max_year_built_om.grid(row=1, column=3)
-    self.min_sqft_om.grid(row=1, column=1)
-    self.max_sqft_om.grid(row=1, column=3)
-    self.sold_within_om.grid(row=0, column=1)
-    self.min_price_om.grid(row=1, column=1)
-    self.max_price_om.grid(row=1, column=3)
-    self.house_type_house_switch.grid(row=1, column=0)
-    self.house_type_townhouse_switch.grid(row=1, column=1)
-    self.house_type_condo_switch.grid(row=2, column=0)
-    self.house_type_mul_fam_switch.grid(row=2, column=1)
-    self.status_coming_soon_chb.grid(row=1, column=0)
-    self.status_active_chb.grid(row=1, column=1)
-    self.status_pending_chb.grid(row=1, column=2)
-    self.reset_filters_button.grid(row=0, column=0, sticky="nesw")
-    self.apply_filters_button.grid(row=0, column=1, sticky="nesw")
+    self.max_sqft_om = ctk.CTkOptionMenu(
+        self.square_feet_frame,
+        values=self.sqft_list,
+        command=lambda x: self.sqft_validation(),
+    )
+    self.status_coming_soon_chb = ctk.CTkCheckBox(
+        self.status_frame, text="Coming soon"
+    )
+    self.status_active_chb = ctk.CTkCheckBox(self.status_frame, text="Active")
+    self.status_pending_chb = ctk.CTkCheckBox(
+        self.status_frame, text="Under contract/Pending"
+    )  # missing one i think
+    self.sold_within_om = ctk.CTkOptionMenu(
+        self.sold_within_frame, values=self.sold_within_list
+    )
+
+    self.min_price_om = ctk.CTkOptionMenu(
+        self.price_range_frame,
+        values=self.price_list,
+        command=lambda x: self.price_validation(),
+    )
+    self.max_price_om = ctk.CTkOptionMenu(
+        self.price_range_frame,
+        values=self.price_list,
+        command=lambda x: self.price_validation(),
+    )
+
+    self.reset_filters_button = ctk.CTkButton(
+        self.reset_apply_frame,
+        text="Reset Filters",
+        command=self.set_default_values,
+    )
+    self.apply_filters_button = ctk.CTkButton(
+        self.reset_apply_frame,
+        text="Apply Filters",
+        command=self.change_to_search_page,
+    )
+
+    # Placing the widgets
+    self.for_sale_sold_label.grid(row=0, column=0)
+    self.stories_label.grid(row=0, column=0)
+    self.year_built_label.grid(row=0, column=0)
+    self.home_type_label.grid(row=0, column=0)
+    self.sqft_label.grid(row=0, column=0)
+    self.sale_status_label.grid(row=0, column=0)
+    self.price_range_label.grid(row=0, column=0)
+    self.year_built_from_label.grid(row=1, column=0)
+    self.year_built_to_label.grid(row=1, column=2)
+    self.price_range_from_label.grid(row=1, column=0)
+    self.price_range_to_label.grid(row=1, column=2)
+    self.sold_within_label.grid(row=0, column=0)
+    self.sold_within_from_label.grid(row=1, column=0)
+    self.sold_within_to_label.grid(row=1, column=2)
+
+    self.for_sale_sold_om.grid(row=0, column=1)
+    self.min_stories_om.grid(row=0, column=1)
+    self.min_year_built_om.grid(row=1, column=1)
+    self.max_year_built_om.grid(row=1, column=3)
+    self.min_sqft_om.grid(row=1, column=1)
+    self.max_sqft_om.grid(row=1, column=3)
+    self.sold_within_om.grid(row=0, column=1)
+    self.min_price_om.grid(row=1, column=1)
+    self.max_price_om.grid(row=1, column=3)
+    self.house_type_house_switch.grid(row=1, column=0)
+    self.house_type_townhouse_switch.grid(row=1, column=1)
+    self.house_type_condo_switch.grid(row=2, column=0)
+    self.house_type_mul_fam_switch.grid(row=2, column=1)
+    self.status_coming_soon_chb.grid(row=1, column=0)
+    self.status_active_chb.grid(row=1, column=1)
+    self.status_pending_chb.grid(row=1, column=2)
+    self.reset_filters_button.grid(row=0, column=0, sticky="w")
+    self.apply_filters_button.grid(row=0, column=1, sticky="e")
 
@@ -1983,18 +2017,7 @@

Source code in src\gui\filterspage.py -
321
-322
-323
-324
-325
-326
-327
-328
-329
-330
-331
-332
+            
332
 333
 334
 335
@@ -2024,48 +2047,59 @@ 

359 360 361 -362

def get_values(self) -> dict[str, Any]:
-    """Get the values of all widgets on this page.
-
-    Returns:
-        dict[str, Any]: dict of values
-    """
-    match self.sold_within_om.get():
-        case "Last 1 week":
-            sold_within_days = RedfinApi.SoldWithinDays.ONE_WEEK
-        case "Last 1 month":
-            sold_within_days = RedfinApi.SoldWithinDays.ONE_MONTH
-        case "Last 3 months":
-            sold_within_days = RedfinApi.SoldWithinDays.THREE_MONTHS
-        case "Last 6 months":
-            sold_within_days = RedfinApi.SoldWithinDays.SIX_MONTHS
-        case "Last 1 year":
-            sold_within_days = RedfinApi.SoldWithinDays.ONE_YEAR
-        case "Last 2 years":
-            sold_within_days = RedfinApi.SoldWithinDays.TWO_YEARS
-        case "Last 3 years":
-            sold_within_days = RedfinApi.SoldWithinDays.THREE_YEARS
-        case _:
-            sold_within_days = RedfinApi.SoldWithinDays.FIVE_YEARS
-
-    return {
-        "for sale sold": self.for_sale_sold_om.get(),
-        "min stories": self.min_stories_om.get(),
-        "max year built": self.max_year_built_om.get(),  # do validation here
-        "min year built": self.min_year_built_om.get(),
-        "sold within": sold_within_days.value,
-        "status active": bool(self.status_active_chb.get()),
-        "status coming soon": bool(self.status_coming_soon_chb.get()),
-        "status pending": bool(self.status_pending_chb.get()),
-        "house type house": bool(self.house_type_house_switch.get()),
-        "house type townhouse": bool(self.house_type_townhouse_switch.get()),
-        "house type mul fam": bool(self.house_type_mul_fam_switch.get()),
-        "house type condo": bool(self.house_type_condo_switch.get()),
-        "max sqft": self.max_sqft_om.get(),
-        "min sqft": self.min_sqft_om.get(),
-        "max price": self.max_price_om.get(),
-        "min price": self.min_price_om.get(),
-    }
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
def get_values(self) -> dict[str, Any]:
+    """Get the values of all widgets on this page.
+
+    Returns:
+        dict[str, Any]: dict of values
+    """
+    match self.sold_within_om.get():
+        case "Last 1 week":
+            sold_within_days = RedfinApi.SoldWithinDays.ONE_WEEK
+        case "Last 1 month":
+            sold_within_days = RedfinApi.SoldWithinDays.ONE_MONTH
+        case "Last 3 months":
+            sold_within_days = RedfinApi.SoldWithinDays.THREE_MONTHS
+        case "Last 6 months":
+            sold_within_days = RedfinApi.SoldWithinDays.SIX_MONTHS
+        case "Last 1 year":
+            sold_within_days = RedfinApi.SoldWithinDays.ONE_YEAR
+        case "Last 2 years":
+            sold_within_days = RedfinApi.SoldWithinDays.TWO_YEARS
+        case "Last 3 years":
+            sold_within_days = RedfinApi.SoldWithinDays.THREE_YEARS
+        case _:
+            sold_within_days = RedfinApi.SoldWithinDays.FIVE_YEARS
+
+    return {
+        "for sale sold": self.for_sale_sold_om.get(),
+        "min stories": self.min_stories_om.get(),
+        "max year built": self.max_year_built_om.get(),  # do validation here
+        "min year built": self.min_year_built_om.get(),
+        "sold within": sold_within_days.value,
+        "status active": bool(self.status_active_chb.get()),
+        "status coming soon": bool(self.status_coming_soon_chb.get()),
+        "status pending": bool(self.status_pending_chb.get()),
+        "house type house": bool(self.house_type_house_switch.get()),
+        "house type townhouse": bool(self.house_type_townhouse_switch.get()),
+        "house type mul fam": bool(self.house_type_mul_fam_switch.get()),
+        "house type condo": bool(self.house_type_condo_switch.get()),
+        "max sqft": self.max_sqft_om.get(),
+        "min sqft": self.min_sqft_om.get(),
+        "max price": self.max_price_om.get(),
+        "min price": self.min_price_om.get(),
+    }
 
@@ -2090,27 +2124,27 @@

Source code in src\gui\filterspage.py -
def house_type_validation(self) -> None:
-    """House type switch validation to make sure at lest house is selected."""
-    if not any(
-        [
-            self.house_type_house_switch.get(),
-            self.house_type_condo_switch.get(),
-            self.house_type_mul_fam_switch.get(),
-            self.house_type_townhouse_switch.get(),
-        ]
-    ):
-        self.house_type_house_switch.select()
+            
def house_type_validation(self) -> None:
+    """House type switch validation to make sure at lest house is selected."""
+    if not any(
+        [
+            self.house_type_house_switch.get(),
+            self.house_type_condo_switch.get(),
+            self.house_type_mul_fam_switch.get(),
+            self.house_type_townhouse_switch.get(),
+        ]
+    ):
+        self.house_type_house_switch.select()
 
@@ -2135,23 +2169,23 @@

Source code in src\gui\filterspage.py -
def price_validation(self):
-    """Called when price range min om gets changed"""
-    if (
-        self.max_price_om.get() == RedfinApi.Price.NONE.value
-        or self.min_price_om.get() == RedfinApi.Price.NONE.value
-    ):
-        return
-    if int(self.max_price_om.get()) < int(self.min_price_om.get()):
-        self.max_price_om.set(self.min_price_om.get())
+            
def price_validation(self):
+    """Called when price range min om gets changed"""
+    if (
+        self.max_price_om.get() == RedfinApi.Price.NONE.value
+        or self.min_price_om.get() == RedfinApi.Price.NONE.value
+    ):
+        return
+    if int(self.max_price_om.get()) < int(self.min_price_om.get()):
+        self.max_price_om.set(self.min_price_om.get())
 
@@ -2178,15 +2212,7 @@

Source code in src\gui\filterspage.py -
234
-235
-236
-237
-238
-239
-240
-241
-242
+            
242
 243
 244
 245
@@ -2199,28 +2225,36 @@ 

252 253 254 -255

def set_default_values(self) -> None:
-    """Set the default values for all widgets.
-    Note:
-        Should be called after init and when clicking reset button.
-    """
-    self.for_sale_sold_om.set(RedfinApi.SoldStatus.SOLD.value)
-    self.min_stories_om.set(RedfinApi.Stories.ONE.value)
-    self.min_year_built_om.set(str(self.cur_year - 1))
-    self.max_year_built_om.set(str(self.cur_year - 1))
-    self.sold_within_om.set(self.sold_within_list[-1])
-    self.max_price_om.set(RedfinApi.Price.NONE.value)
-    self.min_price_om.set(RedfinApi.Price.NONE.value)
-    self.max_sqft_om.set(RedfinApi.Sqft.NONE.value)
-    self.min_sqft_om.set(RedfinApi.Sqft.NONE.value)
-    self.status_active_chb.deselect()
-    self.status_pending_chb.deselect()
-    self.status_coming_soon_chb.deselect()
-    self.house_type_house_switch.select()
-    self.house_type_condo_switch.deselect()
-    self.house_type_townhouse_switch.deselect()
-    self.house_type_mul_fam_switch.deselect()
-    self.status_within_activate_deactivate(self.for_sale_sold_om.get())
+255
+256
+257
+258
+259
+260
+261
+262
+263
def set_default_values(self) -> None:
+    """Set the default values for all widgets.
+    Note:
+        Should be called after init and when clicking reset button.
+    """
+    self.for_sale_sold_om.set(RedfinApi.SoldStatus.SOLD.value)
+    self.min_stories_om.set(RedfinApi.Stories.ONE.value)
+    self.min_year_built_om.set(str(self.cur_year - 1))
+    self.max_year_built_om.set(str(self.cur_year - 1))
+    self.sold_within_om.set(self.sold_within_list[-1])
+    self.max_price_om.set(RedfinApi.Price.NONE.value)
+    self.min_price_om.set(RedfinApi.Price.NONE.value)
+    self.max_sqft_om.set(RedfinApi.Sqft.NONE.value)
+    self.min_sqft_om.set(RedfinApi.Sqft.NONE.value)
+    self.status_active_chb.deselect()
+    self.status_pending_chb.deselect()
+    self.status_coming_soon_chb.deselect()
+    self.house_type_house_switch.select()
+    self.house_type_condo_switch.deselect()
+    self.house_type_townhouse_switch.deselect()
+    self.house_type_mul_fam_switch.deselect()
+    self.status_within_activate_deactivate(self.for_sale_sold_om.get())
 
@@ -2245,23 +2279,23 @@

Source code in src\gui\filterspage.py -
def sqft_validation(self) -> None:
-    """Sqft dropdown callback"""
-    if (
-        self.max_sqft_om.get() == RedfinApi.Sqft.NONE.value
-        or self.min_sqft_om.get() == RedfinApi.Sqft.NONE.value
-    ):
-        return
-    if int(self.max_sqft_om.get()) < int(self.min_sqft_om.get()):
-        self.max_sqft_om.set(self.min_sqft_om.get())
+            
def sqft_validation(self) -> None:
+    """Sqft dropdown callback"""
+    if (
+        self.max_sqft_om.get() == RedfinApi.Sqft.NONE.value
+        or self.min_sqft_om.get() == RedfinApi.Sqft.NONE.value
+    ):
+        return
+    if int(self.max_sqft_om.get()) < int(self.min_sqft_om.get()):
+        self.max_sqft_om.set(self.min_sqft_om.get())
 
@@ -2316,15 +2350,7 @@

Source code in src\gui\filterspage.py -
257
-258
-259
-260
-261
-262
-263
-264
-265
+            
265
 266
 267
 268
@@ -2336,27 +2362,41 @@ 

274 275 276 -277

def status_within_activate_deactivate(self, status) -> None:
-    """Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.
-
-    Args:
-        status (Event): ignored
-    """
-    match self.for_sale_sold_om.get():
-        case RedfinApi.SoldStatus.FOR_SALE.value:
-            self.sale_status_label.configure(state="normal")
-            self.status_active_chb.configure(state="normal")
-            self.status_coming_soon_chb.configure(state="normal")
-            self.status_pending_chb.configure(state="normal")
-            self.sold_within_label.configure(state="disabled")
-            self.sold_within_om.configure(state="disabled")
-        case RedfinApi.SoldStatus.SOLD.value:
-            self.sale_status_label.configure(state="disabled")
-            self.status_active_chb.configure(state="disabled")
-            self.status_coming_soon_chb.configure(state="disabled")
-            self.status_pending_chb.configure(state="disabled")
-            self.sold_within_label.configure(state="normal")
-            self.sold_within_om.configure(state="normal")
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
def status_within_activate_deactivate(self, status) -> None:
+    """Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.
+
+    Args:
+        status (Event): ignored
+    """
+    match self.for_sale_sold_om.get():
+        case RedfinApi.SoldStatus.FOR_SALE.value:
+            self.sale_status_label.configure(state="normal")
+            self.status_active_chb.configure(state="normal")
+            self.status_coming_soon_chb.configure(state="normal")
+            self.status_pending_chb.configure(state="normal")
+            self.sold_within_label.configure(state="disabled")
+            self.sold_within_om.configure(state="disabled")
+        case RedfinApi.SoldStatus.SOLD.value:
+            self.sale_status_label.configure(state="disabled")
+            self.status_active_chb.configure(state="disabled")
+            self.status_coming_soon_chb.configure(state="disabled")
+            self.status_pending_chb.configure(state="disabled")
+            self.sold_within_label.configure(state="normal")
+            self.sold_within_om.configure(state="normal")
+            self.status_active_chb.deselect()
+            self.status_pending_chb.deselect()
+            self.status_coming_soon_chb.deselect()
 
@@ -2381,13 +2421,13 @@

Source code in src\gui\filterspage.py -
def year_validation(self) -> None:
-    """Year drop down callback"""
-    if int(self.max_year_built_om.get()) < int(self.min_year_built_om.get()):
-        self.max_year_built_om.set(self.min_year_built_om.get())
+            
def year_validation(self) -> None:
+    """Year drop down callback"""
+    if int(self.max_year_built_om.get()) < int(self.min_year_built_om.get()):
+        self.max_year_built_om.set(self.min_year_built_om.get())
 
diff --git a/gui/searchpage/index.html b/gui/searchpage/index.html index 6483894..2608a97 100755 --- a/gui/searchpage/index.html +++ b/gui/searchpage/index.html @@ -918,7 +918,22 @@

187 188 189 -190

class SearchPage(ctk.CTkFrame):
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
class SearchPage(ctk.CTkFrame):
     def __init__(self, master: ctk.CTk, **kwargs):
         super().__init__(master, **kwargs)
         self.master = master
@@ -965,133 +980,148 @@ 

border_width=2, command=lambda x: self.update_entry_on_autocomplete_select(x), ) - self.search_button = ctk.CTkButton( - self, - text="Search", - fg_color="transparent", - height=35, - corner_radius=10, - border_width=2, - text_color=("gray10", "#DCE4EE"), - command=self.validate_entry_box_and_search, - ) - - self.columnconfigure((0, 2), weight=1) - self.columnconfigure(1, weight=4) - self.rowconfigure(0, weight=10) - self.rowconfigure(1, weight=4) - self.rowconfigure(2, weight=10) - - self.top_text.grid(column=0, row=0, columnspan=3) + self.search_frame = ctk.CTkFrame(self, fg_color="transparent") + self.search_button = ctk.CTkButton( + self.search_frame, + text="Search", + fg_color="transparent", + height=35, + corner_radius=10, + border_width=2, + text_color=("gray10", "#DCE4EE"), + command=self.validate_entry_box_and_search, + ) + self.cache_chb = ctk.CTkCheckBox(self.search_frame, text="Use cache") + + self.columnconfigure((0, 2), weight=1) + self.columnconfigure(1, weight=4) + self.rowconfigure(0, weight=10) + self.rowconfigure(1, weight=4) + self.rowconfigure(2, weight=10) - self.redfin_filters_button.grid(column=0, row=1, padx=(0, 40), sticky="e") + self.top_text.grid(column=0, row=0, columnspan=3) - self.search_bar.grid(column=1, row=1, sticky="ew") + self.redfin_filters_button.grid(column=0, row=1, padx=(0, 40), sticky="e") - self.suggestion_list_box.grid(column=1, row=2, sticky="new", pady=(10, 0)) + self.search_bar.grid(column=1, row=1, sticky="ew") - self.search_button.grid(column=2, row=1, padx=(40, 0), sticky="w") + self.suggestion_list_box.grid(column=1, row=2, sticky="new", pady=(10, 0)) - self.suggestion_list_box.grid_remove() - self.search_bar.bind( - "<KeyRelease>", command=lambda x: self.update_suggestions_listbox(x) - ) - - def update_suggestions_listbox(self, x: Event | None) -> None: - """Update the suggestions box based on the contents of 'self.search_bar'. - - Args: - x (Event | None): ignored - """ - cur_text = re.escape(self.search_bar.get()) - if cur_text == "": - # only gets called when all text has been deleted - self.current_auto_complete_series = self.auto_complete_series - self.suggestion_list_box.grid_remove() - else: - self.suggestion_list_box.delete("all") - if ( - self.current_auto_complete_series is None - or len(cur_text) < self.prev_search_bar_len - ): - self.current_auto_complete_series = self.auto_complete_series.filter( - self.auto_complete_series.str.contains(rf"(?i)^{cur_text}") - ) - else: - self.current_auto_complete_series = ( - self.current_auto_complete_series.filter( - self.current_auto_complete_series.str.contains( - rf"(?i)^{cur_text}" - ) - ) - ) - self.suggestion_list_box.grid() - self.current_auto_complete_series.head( - self.MATCHES_TO_DISPLAY - ).map_elements( - lambda msa: self.suggestion_list_box.insert( - "end", msa, border_width=2, border_color="gray" - ), - return_dtype=pl.Utf8, - ) - self.prev_search_bar_len = len(cur_text) - - def update_entry_on_autocomplete_select(self, x: Event) -> None: - """Suggestions list box callback for when a button in the list box is selected.""" - self.search_bar.delete(0, ctk.END) - self.search_bar.insert(0, x) - self.update_suggestions_listbox(None) - - def validate_entry_box_and_search(self) -> None: - """Validate `self.search_bar` contents and search if the contents are an MSA name.""" - cur_text = self.search_bar.get() - if len(cur_text) == 0: - cur_text = r"!^" - if any(self.auto_complete_series.str.contains(rf"{cur_text}$")): - self.data_page = DataPage(self.master) - self.data_page.grid(row=0, column=0, sticky="news") - self.go_to_data_page(cur_text) - self.search_metros_threaded(cur_text) - else: - CTkMessagebox( - self, - title="Error", - message="Inputted name is not in MSA name list!", - icon="warning", - ) - - def go_to_data_page(self, msa_name: str) -> None: - """Switch to data page. - - Args: - msa_name (str): Metropolitan Statistical Area name - """ - if self.data_page is not None: - self.grid_remove() - self.data_page.grid() - self.data_page.set_msa_name(msa_name) + self.search_frame.columnconfigure(0, weight=1) + self.search_frame.rowconfigure((0, 1), weight=1) + # pady is hacky but whatever + self.search_frame.grid(column=2, row=1, padx=(40, 0), pady=(46, 0)) + self.search_button.grid(column=0, row=0, sticky="w") + self.cache_chb.grid(column=0, row=1, pady=(20, 0), sticky="w") + + self.suggestion_list_box.grid_remove() + self.search_bar.bind( + "<KeyRelease>", command=lambda x: self.update_suggestions_listbox(x) + ) + + def update_suggestions_listbox(self, x: Event | None) -> None: + """Update the suggestions box based on the contents of 'self.search_bar'. + + Args: + x (Event | None): ignored + """ + cur_text = re.escape(self.search_bar.get()) + if cur_text == "": + # only gets called when all text has been deleted + self.current_auto_complete_series = self.auto_complete_series + self.suggestion_list_box.grid_remove() + else: + self.suggestion_list_box.delete("all") + if ( + self.current_auto_complete_series is None + or len(cur_text) < self.prev_search_bar_len + ): + self.current_auto_complete_series = self.auto_complete_series.filter( + self.auto_complete_series.str.contains(rf"(?i)^{cur_text}") + ) + else: + self.current_auto_complete_series = ( + self.current_auto_complete_series.filter( + self.current_auto_complete_series.str.contains( + rf"(?i)^{cur_text}" + ) + ) + ) + self.suggestion_list_box.grid() + try: + self.current_auto_complete_series.head( + self.MATCHES_TO_DISPLAY + ).map_elements( + lambda msa: self.suggestion_list_box.insert( + "end", msa, border_width=2, border_color="gray" + ), + return_dtype=pl.Utf8, + ) + except KeyError: + # always throws a key error, doesnt matter to us, just pollutes logs + pass + self.prev_search_bar_len = len(cur_text) + + def update_entry_on_autocomplete_select(self, x: Event) -> None: + """Suggestions list box callback for when a button in the list box is selected.""" + self.search_bar.delete(0, ctk.END) + self.search_bar.insert(0, x) + self.update_suggestions_listbox(None) + + def validate_entry_box_and_search(self) -> None: + """Validate `self.search_bar` contents and search if the contents are an MSA name.""" + cur_text = self.search_bar.get() + if len(cur_text) == 0: + cur_text = r"!^" + if any(self.auto_complete_series.str.contains(rf"{cur_text}$")): + self.data_page = DataPage(self.master) + self.data_page.grid(row=0, column=0, sticky="news") + self.go_to_data_page(cur_text) + self.search_metros_threaded(cur_text) + else: + CTkMessagebox( + self, + title="Error", + message="Inputted name is not in MSA name list!", + icon="warning", + ) - def search_metros_threaded(self, msa_name: str) -> None: - """Search the given Metropolitan Statistical Area name for housing attributes. + def go_to_data_page(self, msa_name: str) -> None: + """Switch to data page. Args: msa_name (str): Metropolitan Statistical Area name """ - redfin_searcher = RedfinApi() - lock = threading.Lock() - with lock: - threading.Thread( - target=redfin_searcher.get_house_attributes_from_metro, - args=(msa_name, self.filters_page.get_values()), - daemon=True, - ).start() - - def change_to_filters_page(self) -> None: - """Change to filters page.""" - if self.filters_page is not None: - self.filters_page.grid(row=0, column=0, sticky="news") - self.grid_remove() - self.filters_page.grid() + if self.data_page is not None: + self.grid_remove() + self.data_page.grid() + self.data_page.set_msa_name(msa_name) + + def search_metros_threaded(self, msa_name: str) -> None: + """Search the given Metropolitan Statistical Area name for housing attributes. + + Args: + msa_name (str): Metropolitan Statistical Area name + """ + redfin_searcher = RedfinApi() + lock = threading.Lock() + with lock: + threading.Thread( + target=redfin_searcher.get_house_attributes_from_metro, + args=( + msa_name, + self.filters_page.get_values(), + bool(self.cache_chb.get()), + ), + daemon=True, + ).start() + + def change_to_filters_page(self) -> None: + """Change to filters page.""" + if self.filters_page is not None: + self.filters_page.grid(row=0, column=0, sticky="news") + self.grid_remove() + self.filters_page.grid()

@@ -1125,17 +1155,17 @@

Source code in src\gui\searchpage.py -
def change_to_filters_page(self) -> None:
-    """Change to filters page."""
-    if self.filters_page is not None:
-        self.filters_page.grid(row=0, column=0, sticky="news")
-        self.grid_remove()
-        self.filters_page.grid()
+            
def change_to_filters_page(self) -> None:
+    """Change to filters page."""
+    if self.filters_page is not None:
+        self.filters_page.grid(row=0, column=0, sticky="news")
+        self.grid_remove()
+        self.filters_page.grid()
 
@@ -1160,71 +1190,78 @@

Source code in src\gui\searchpage.py -
30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
-86
-87
-88
-89
-90
-91
-92
-93
-94
def create_widgets(self) -> None:
+            
def create_widgets(self) -> None:
     """Create widgets."""
     self.top_text = ctk.CTkLabel(
         self,
@@ -1258,37 +1295,44 @@ 

border_width=2, command=lambda x: self.update_entry_on_autocomplete_select(x), ) - self.search_button = ctk.CTkButton( - self, - text="Search", - fg_color="transparent", - height=35, - corner_radius=10, - border_width=2, - text_color=("gray10", "#DCE4EE"), - command=self.validate_entry_box_and_search, - ) - - self.columnconfigure((0, 2), weight=1) - self.columnconfigure(1, weight=4) - self.rowconfigure(0, weight=10) - self.rowconfigure(1, weight=4) - self.rowconfigure(2, weight=10) - - self.top_text.grid(column=0, row=0, columnspan=3) + self.search_frame = ctk.CTkFrame(self, fg_color="transparent") + self.search_button = ctk.CTkButton( + self.search_frame, + text="Search", + fg_color="transparent", + height=35, + corner_radius=10, + border_width=2, + text_color=("gray10", "#DCE4EE"), + command=self.validate_entry_box_and_search, + ) + self.cache_chb = ctk.CTkCheckBox(self.search_frame, text="Use cache") + + self.columnconfigure((0, 2), weight=1) + self.columnconfigure(1, weight=4) + self.rowconfigure(0, weight=10) + self.rowconfigure(1, weight=4) + self.rowconfigure(2, weight=10) - self.redfin_filters_button.grid(column=0, row=1, padx=(0, 40), sticky="e") + self.top_text.grid(column=0, row=0, columnspan=3) - self.search_bar.grid(column=1, row=1, sticky="ew") + self.redfin_filters_button.grid(column=0, row=1, padx=(0, 40), sticky="e") - self.suggestion_list_box.grid(column=1, row=2, sticky="new", pady=(10, 0)) + self.search_bar.grid(column=1, row=1, sticky="ew") - self.search_button.grid(column=2, row=1, padx=(40, 0), sticky="w") + self.suggestion_list_box.grid(column=1, row=2, sticky="new", pady=(10, 0)) - self.suggestion_list_box.grid_remove() - self.search_bar.bind( - "<KeyRelease>", command=lambda x: self.update_suggestions_listbox(x) - ) + self.search_frame.columnconfigure(0, weight=1) + self.search_frame.rowconfigure((0, 1), weight=1) + # pady is hacky but whatever + self.search_frame.grid(column=2, row=1, padx=(40, 0), pady=(46, 0)) + self.search_button.grid(column=0, row=0, sticky="w") + self.cache_chb.grid(column=0, row=1, pady=(20, 0), sticky="w") + + self.suggestion_list_box.grid_remove() + self.search_bar.bind( + "<KeyRelease>", command=lambda x: self.update_suggestions_listbox(x) + )

@@ -1343,25 +1387,25 @@

Source code in src\gui\searchpage.py -
def go_to_data_page(self, msa_name: str) -> None:
-    """Switch to data page.
-
-    Args:
-        msa_name (str): Metropolitan Statistical Area name
-    """
-    if self.data_page is not None:
-        self.grid_remove()
-        self.data_page.grid()
-        self.data_page.set_msa_name(msa_name)
+            
def go_to_data_page(self, msa_name: str) -> None:
+    """Switch to data page.
+
+    Args:
+        msa_name (str): Metropolitan Statistical Area name
+    """
+    if self.data_page is not None:
+        self.grid_remove()
+        self.data_page.grid()
+        self.data_page.set_msa_name(msa_name)
 
@@ -1416,33 +1460,41 @@

Source code in src\gui\searchpage.py -
170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
+            
def search_metros_threaded(self, msa_name: str) -> None:
-    """Search the given Metropolitan Statistical Area name for housing attributes.
-
-    Args:
-        msa_name (str): Metropolitan Statistical Area name
-    """
-    redfin_searcher = RedfinApi()
-    lock = threading.Lock()
-    with lock:
-        threading.Thread(
-            target=redfin_searcher.get_house_attributes_from_metro,
-            args=(msa_name, self.filters_page.get_values()),
-            daemon=True,
-        ).start()
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
def search_metros_threaded(self, msa_name: str) -> None:
+    """Search the given Metropolitan Statistical Area name for housing attributes.
+
+    Args:
+        msa_name (str): Metropolitan Statistical Area name
+    """
+    redfin_searcher = RedfinApi()
+    lock = threading.Lock()
+    with lock:
+        threading.Thread(
+            target=redfin_searcher.get_house_attributes_from_metro,
+            args=(
+                msa_name,
+                self.filters_page.get_values(),
+                bool(self.cache_chb.get()),
+            ),
+            daemon=True,
+        ).start()
 
@@ -1467,15 +1519,15 @@

Source code in src\gui\searchpage.py -
def update_entry_on_autocomplete_select(self, x: Event) -> None:
-    """Suggestions list box callback for when a button in the list box is selected."""
-    self.search_bar.delete(0, ctk.END)
-    self.search_bar.insert(0, x)
-    self.update_suggestions_listbox(None)
+            
def update_entry_on_autocomplete_select(self, x: Event) -> None:
+    """Suggestions list box callback for when a button in the list box is selected."""
+    self.search_bar.delete(0, ctk.END)
+    self.search_bar.insert(0, x)
+    self.update_suggestions_listbox(None)
 
@@ -1530,14 +1582,7 @@

Source code in src\gui\searchpage.py -
 96
- 97
- 98
- 99
-100
-101
-102
-103
+            
103
 104
 105
 106
@@ -1567,44 +1612,59 @@ 

130 131 132 -133

def update_suggestions_listbox(self, x: Event | None) -> None:
-    """Update the suggestions box based on the contents of 'self.search_bar'.
-
-    Args:
-        x (Event | None): ignored
-    """
-    cur_text = re.escape(self.search_bar.get())
-    if cur_text == "":
-        # only gets called when all text has been deleted
-        self.current_auto_complete_series = self.auto_complete_series
-        self.suggestion_list_box.grid_remove()
-    else:
-        self.suggestion_list_box.delete("all")
-        if (
-            self.current_auto_complete_series is None
-            or len(cur_text) < self.prev_search_bar_len
-        ):
-            self.current_auto_complete_series = self.auto_complete_series.filter(
-                self.auto_complete_series.str.contains(rf"(?i)^{cur_text}")
-            )
-        else:
-            self.current_auto_complete_series = (
-                self.current_auto_complete_series.filter(
-                    self.current_auto_complete_series.str.contains(
-                        rf"(?i)^{cur_text}"
-                    )
-                )
-            )
-        self.suggestion_list_box.grid()
-        self.current_auto_complete_series.head(
-            self.MATCHES_TO_DISPLAY
-        ).map_elements(
-            lambda msa: self.suggestion_list_box.insert(
-                "end", msa, border_width=2, border_color="gray"
-            ),
-            return_dtype=pl.Utf8,
-        )
-    self.prev_search_bar_len = len(cur_text)
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
def update_suggestions_listbox(self, x: Event | None) -> None:
+    """Update the suggestions box based on the contents of 'self.search_bar'.
+
+    Args:
+        x (Event | None): ignored
+    """
+    cur_text = re.escape(self.search_bar.get())
+    if cur_text == "":
+        # only gets called when all text has been deleted
+        self.current_auto_complete_series = self.auto_complete_series
+        self.suggestion_list_box.grid_remove()
+    else:
+        self.suggestion_list_box.delete("all")
+        if (
+            self.current_auto_complete_series is None
+            or len(cur_text) < self.prev_search_bar_len
+        ):
+            self.current_auto_complete_series = self.auto_complete_series.filter(
+                self.auto_complete_series.str.contains(rf"(?i)^{cur_text}")
+            )
+        else:
+            self.current_auto_complete_series = (
+                self.current_auto_complete_series.filter(
+                    self.current_auto_complete_series.str.contains(
+                        rf"(?i)^{cur_text}"
+                    )
+                )
+            )
+        self.suggestion_list_box.grid()
+        try:
+            self.current_auto_complete_series.head(
+                self.MATCHES_TO_DISPLAY
+            ).map_elements(
+                lambda msa: self.suggestion_list_box.insert(
+                    "end", msa, border_width=2, border_color="gray"
+                ),
+                return_dtype=pl.Utf8,
+            )
+        except KeyError:
+            # always throws a key error, doesnt matter to us, just pollutes logs
+            pass
+    self.prev_search_bar_len = len(cur_text)
 
@@ -1629,39 +1689,39 @@

Source code in src\gui\searchpage.py -
141
-142
-143
-144
-145
-146
-147
-148
-149
-150
-151
-152
+            
def validate_entry_box_and_search(self) -> None:
-    """Validate `self.search_bar` contents and search if the contents are an MSA name."""
-    cur_text = self.search_bar.get()
-    if len(cur_text) == 0:
-        cur_text = r"!^"
-    if any(self.auto_complete_series.str.contains(rf"{cur_text}$")):
-        self.data_page = DataPage(self.master)
-        self.data_page.grid(row=0, column=0, sticky="news")
-        self.go_to_data_page(cur_text)
-        self.search_metros_threaded(cur_text)
-    else:
-        CTkMessagebox(
-            self,
-            title="Error",
-            message="Inputted name is not in MSA name list!",
-            icon="warning",
-        )
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
def validate_entry_box_and_search(self) -> None:
+    """Validate `self.search_bar` contents and search if the contents are an MSA name."""
+    cur_text = self.search_bar.get()
+    if len(cur_text) == 0:
+        cur_text = r"!^"
+    if any(self.auto_complete_series.str.contains(rf"{cur_text}$")):
+        self.data_page = DataPage(self.master)
+        self.data_page.grid(row=0, column=0, sticky="news")
+        self.go_to_data_page(cur_text)
+        self.search_metros_threaded(cur_text)
+    else:
+        CTkMessagebox(
+            self,
+            title="Error",
+            message="Inputted name is not in MSA name list!",
+            icon="warning",
+        )
 
diff --git a/index.html b/index.html index 0494ae5..8aac008 100755 --- a/index.html +++ b/index.html @@ -304,8 +304,15 @@
  • - - Atlas IQP B23 Residential Heating Code Documentation + + Code Documentation + + +
  • + +
  • + + Program usage
  • @@ -647,8 +654,10 @@

    Home

    -

    Atlas IQP B23 Residential Heating Code Documentation

    -

    Public functions are documented on the left. A video series on YouTube will be uploaded soon to assist in learning about the usage of this program.

    +

    Code Documentation

    +

    Public functions are documented on the left. Additional comments for private functions are found through looking at the source code. The main.py file is not a part of the scraping library and just serves as a launching point for the GUI application.

    +

    Program usage

    +

    A YouTube playlist is being created that walks through how to use the scraping tool. An additional set of videos on how to combine the data collected and import and analyze them in QGIS is being created as well.

    diff --git a/search/search_index.json b/search/search_index.json index 87da9a5..671fb01 100755 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#atlas-iqp-b23-residential-heating-code-documentation","title":"Atlas IQP B23 Residential Heating Code Documentation","text":"

    Public functions are documented on the left. A video series on YouTube will be uploaded soon to assist in learning about the usage of this program.

    "},{"location":"backend/helper/","title":"Helper","text":""},{"location":"backend/helper/#backend.helper.ASCIIColors","title":"ASCIIColors","text":"

    Bases: StrEnum

    ASCII colors for use in printing colored text to the terminal.

    Source code in src\\backend\\helper.py
    class ASCIIColors(StrEnum):\n    \"\"\"ASCII colors for use in printing colored text to the terminal.\"\"\"\n\n    GREY = \"\\x1b[38;20m\"\n    YELLOW = \"\\x1b[33;20m\"\n    RED = \"\\x1b[31;20m\"\n    BOLD_RED = \"\\x1b[31;1m\"\n    RESET = \"\\x1b[0m\"\n
    "},{"location":"backend/helper/#backend.helper.df_to_file","title":"df_to_file(df)","text":"

    Write a DataFrame to a unique file.

    Parameters:

    Name Type Description Default df DataFrame

    the DataFrame to write

    required Source code in src\\backend\\helper.py
    def df_to_file(df: pl.DataFrame):\n    \"\"\"Write a DataFrame to a unique file.\n\n    Args:\n        df (pl.DataFrame): the DataFrame to write\n    \"\"\"\n    file_path = OUTPUT_DIR / f\"{time.time()}_data_frame.csv\"\n    print(f\"Dataframe saved to {file_path.resolve()}\")\n    df.write_csv(file_path, include_header=True)\n
    "},{"location":"backend/helper/#backend.helper.is_valid_zipcode","title":"is_valid_zipcode(zip)","text":"

    Check if the given ZIP code is valid based on a local file.

    Parameters:

    Name Type Description Default zip int

    the ZIP code to check

    required

    Returns:

    Name Type Description bool bool

    if ZIP code is valid

    Source code in src\\backend\\helper.py
    def is_valid_zipcode(zip: int) -> bool:\n    \"\"\"Check if the given ZIP code is valid based on a local file.\n\n    Args:\n        zip (int): the ZIP code to check\n\n    Returns:\n        bool: if ZIP code is valid\n    \"\"\"\n    if isinstance(zip, str):\n        zip = int(zip)\n    return zip in master_df[\"ZIP\"]\n
    "},{"location":"backend/helper/#backend.helper.metro_name_to_zip_code_list","title":"metro_name_to_zip_code_list(msa_name)","text":"

    Return the constituent ZIP codes for the given Metropolitan Statistical Area.

    Parameters:

    Name Type Description Default msa_name str

    name of the Metropolitan Statistical Area

    required

    Returns:

    Type Description list[int]

    list[int]: list of ZIP codes found. Is empty if MSA name is invalid

    Source code in src\\backend\\helper.py
    def metro_name_to_zip_code_list(msa_name: str) -> list[int]:\n    \"\"\"Return the constituent ZIP codes for the given Metropolitan Statistical Area.\n\n    Args:\n        msa_name (str): name of the Metropolitan Statistical Area\n\n    Returns:\n        list[int]: list of ZIP codes found. Is empty if MSA name is invalid\n    \"\"\"\n    if msa_name == \"TEST\":\n        # return [20814]  # good and small\n        # return [22067, 55424]  # nulls in sqft\n        return [20015, 20018, 20017]  # nulls in sqft and large\n\n    df = master_df.select(\"ZIP\", \"METRO_NAME\", \"LSAD\")\n\n    return (\n        df.filter(\n            (pl.col(\"METRO_NAME\").eq(msa_name))\n            & (pl.col(\"LSAD\").eq(\"Metropolitan Statistical Area\"))\n        )\n        .unique()[\"ZIP\"]\n        .to_list()\n    )\n
    "},{"location":"backend/helper/#backend.helper.req_get_to_file","title":"req_get_to_file(request)","text":"

    Write the contents of a request response to a unique file.

    Parameters:

    Name Type Description Default request Response

    the request

    required

    Returns:

    Name Type Description int int

    the status code of the request

    Source code in src\\backend\\helper.py
    def req_get_to_file(request: requests.Response) -> int:\n    \"\"\"Write the contents of a request response to a unique file.\n\n    Args:\n        request (requests.Response): the request\n\n    Returns:\n        int: the status code of the request\n    \"\"\"\n    with open(OUTPUT_DIR / f\"{time.time()}_request.html\", \"w+\", encoding=\"utf-8\") as f:\n        f.write(request.text)\n    return request.status_code\n
    "},{"location":"backend/helper/#backend.helper.state_city_to_zip_df","title":"state_city_to_zip_df(state, city)","text":"

    Take in a state and city and return the ZIP code constituents of that city.

    Parameters:

    Name Type Description Default state str

    the state

    required city str

    the city

    required

    Returns:

    Type Description DataFrame

    pl.DataFrame: DataFrame of ZIP codes

    Source code in src\\backend\\helper.py
    def state_city_to_zip_df(state: str, city: str) -> pl.DataFrame:\n    \"\"\"Take in a state and city and return the ZIP code constituents of that city.\n\n    Args:\n        state (str): the state\n        city (str): the city\n\n    Returns:\n        pl.DataFrame: DataFrame of ZIP codes\n    \"\"\"\n    return (\n        pl.read_csv(\"zip_registry.csv\")\n        .filter((pl.col(\"state\") == state) & (pl.col(\"city\") == city))\n        .select(\"zipcode\")\n    )\n
    "},{"location":"backend/helper/#backend.helper.state_county_to_zip_df","title":"state_county_to_zip_df(state, county)","text":"

    Take in a state and county and return the ZIP code constituents of that county.

    Parameters:

    Name Type Description Default state str

    the state

    required county str

    the county

    required

    Returns:

    Type Description DataFrame

    pl.DataFrame: DataFrame of ZIP codes

    Source code in src\\backend\\helper.py
    def state_county_to_zip_df(state: str, county: str) -> pl.DataFrame:\n    \"\"\"Take in a state and county and return the ZIP code constituents of that county.\n\n    Args:\n        state (str): the state\n        county (str): the county\n\n    Returns:\n        pl.DataFrame: DataFrame of ZIP codes\n    \"\"\"\n    return (\n        pl.read_csv(\"zip_registry.csv\")\n        .filter((pl.col(\"state\") == state) & (pl.col(\"county\") == county))\n        .select(\"zipcode\")\n    )\n
    "},{"location":"backend/helper/#backend.helper.zip_to_metro","title":"zip_to_metro(zip)","text":"

    Find the Metropolitan Statistical Area name for the specified ZIP code.

    Parameters:

    Name Type Description Default zip int

    the ZIP code to look up

    required

    Returns:

    Name Type Description str str

    the Metropolitan name. Is empty if the ZIP code is not a part of a Metropolitan Statistical Area

    Source code in src\\backend\\helper.py
    def zip_to_metro(zip: int) -> str:\n    \"\"\"Find the Metropolitan Statistical Area name for the specified ZIP code.\n\n    Args:\n        zip (int): the ZIP code to look up\n\n    Returns:\n        str: the Metropolitan name. Is empty if the ZIP code is not a part of a Metropolitan Statistical Area\n    \"\"\"\n    result = master_df.filter(master_df[\"ZIP\"] == zip)[\"METRO_NAME\"]\n\n    if len(result) > 0:\n        log(\"Zip has multiple codes. Only giving first one\", \"debug\")\n        return result[0]\n    else:\n        return \"\"  # should this be none?\n
    "},{"location":"backend/redfinscraper/","title":"Redfinscraper","text":""},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi","title":"RedfinApi","text":"

    Scrape redfin using their stingray api. Use this class for getting and the iterating over ZIP code level data, creating an object for each new zip code.

    Source code in src\\backend\\redfinscraper.py
    class RedfinApi:\n    \"\"\"Scrape redfin using their stingray api. Use this class for getting and the iterating over ZIP code level data, creating an object for each new zip code.\"\"\"\n\n    class SoldStatus(StrEnum):\n        FOR_SALE = \"For Sale\"\n        SOLD = \"Sold\"\n\n    class HouseType(StrEnum):\n        HOUSE = \"1\"\n        CONDO = \"2\"\n        TOWNHOUSE = \"3\"\n        MULTI_FAMILY = \"4\"\n        LAND = \"5\"\n        OTHER = \"6\"\n\n    class Price(StrEnum):\n        NONE = \"None\"\n        FIFTY_THOU = \"50000\"\n        SEVENTY_FIVE_THOU = \"75000\"\n        ONE_HUN_THOU = \"100000\"\n        ONE_HUN_25_THOU = \"125000\"\n        ONE_HUN_5_THOU = \"150000\"\n        ONE_HUN_75_THOU = \"175000\"\n        TWO_HUN_THOU = \"200000\"\n        TWO_HUN_25_THOU = \"225000\"\n        TWO_HUN_5_THOU = \"250000\"\n        TWO_HUN_75_THOU = \"275000\"\n        THREE_HUN_THOU = \"300000\"\n        THREE_HUN_25_THOU = \"325000\"\n        THREE_HUN_5_THOU = \"350000\"\n        THREE_HUN_75_THOU = \"375000\"\n        FOUR_HUN_THOU = \"400000\"\n        FOUR_HUN_25_THOU = \"425000\"\n        FOUR_HUN_5_THOU = \"450000\"\n        FOUR_HUN_75_THOU = \"475000\"\n        FIVE_HUN_THOU = \"500000\"\n        FIVE_HUN_5_THOU = \"550000\"\n        SIX_HUN_THOU = \"600000\"\n        SIX_HUN_5_THOU = \"650000\"\n        SEVEN_HUN_THOU = \"700000\"\n        SEVEN_HUN_5_THOU = \"750000\"\n        EIGHT_HUN_THOU = \"800000\"\n        EIGHT_HUN_5_THOU = \"850000\"\n        NINE_HUN_THOU = \"900000\"\n        NINE_HUN_5_THOU = \"950000\"\n        ONE_MIL = \"1000000\"\n        ONE_MIL_25_THOU = \"1250000\"\n        ONE_MIL_5_THOU = \"1500000\"\n        ONE_MIL_75_THOU = \"1750000\"\n        TWO_MIL = \"2000000\"\n        TWO_MIL_25_THOU = \"2250000\"\n        TWO_MIL_5_THOU = \"2500000\"\n        TWO_MIL_75_THOU = \"2750000\"\n        THREE_MIL = \"3000000\"\n        THREE_MIL_25_THOU = \"3250000\"\n        THREE_MIL_5_THOU = \"3500000\"\n        THREE_MIL_75_THOU = \"3750000\"\n        FOUR_MIL = \"4000000\"\n        FOUR_MIL_25_THOU = \"4250000\"\n        FOUR_MIL_5_THOU = \"4500000\"\n        FOUR_MIL_75_THOU = \"4750000\"\n        FIVE_MIL = \"5000000\"\n        SIX_MIL = \"6000000\"\n        SEVEN_MIL = \"7000000\"\n        EIGHT_MIL = \"8000000\"\n        NINE_MIL = \"9000000\"\n        TEN_MIL = \"10000000\"\n\n    class SortOrder(StrEnum):\n        RECOMMENDED = \"redfin-recommended-asc\"\n        NEWEST = \"days-on-redfin-asc\"\n        MOST_RECENTLY_SOLD = \"last-sale-date-desc\"\n        LOW_HI = \"price-asc\"\n        HI_LOW = \"price-desc\"\n        SQFT = \"square-footage-desc\"\n        LOT_SIZE = \"lot-sq-ft-desc\"\n        SQFT_PRICE = \"dollars-per-sq-ft-asc\"\n\n    class SoldWithinDays(StrEnum):\n        ONE_WEEK = \"7\"\n        ONE_MONTH = \"30\"\n        THREE_MONTHS = \"90\"\n        SIX_MONTHS = \"180\"\n        ONE_YEAR = \"365\"\n        TWO_YEARS = \"730\"\n        THREE_YEARS = \"1095\"\n        FIVE_YEARS = \"1825\"\n\n    class Stories(StrEnum):\n        ONE = \"1\"\n        TWO = \"2\"\n        THREE = \"3\"\n        FOUR = \"4\"\n        FIVE = \"5\"\n        TEN = \"10\"\n        FIFTEEN = \"15\"\n        TWENTY = \"20\"\n\n    class Sqft(StrEnum):\n        NONE = \"None\"\n        SEVEN_FIFTY = \"750\"\n        THOU = \"1000\"\n        THOU_1 = \"1100\"\n        THOU_2 = \"1200\"\n        THOU_3 = \"1300\"\n        THOU_4 = \"1400\"\n        THOU_5 = \"1500\"\n        THOU_6 = \"1600\"\n        THOU_7 = \"1700\"\n        THOU_8 = \"1800\"\n        THOU_9 = \"1900\"\n        TWO_THOU = \"2000\"\n        TWO_THOU_250 = \"2250\"\n        TWO_THOU_500 = \"2500\"\n        TWO_THOU_750 = \"2750\"\n        THREE_THOU = \"3000\"\n        FOUR_THOU = \"4000\"\n        FIVE_THOU = \"5000\"\n        SEVEN_THOU_500 = \"7500\"\n        TEN_THOU = \"10000\"\n\n    def __init__(self) -> None:\n        self.rf = redfin.Redfin()\n        self.DESIRED_CSV_SCHEMA = {\n            \"ADDRESS\": str,\n            \"CITY\": str,\n            \"PROPERTY TYPE\": str,\n            \"STATE OR PROVINCE\": str,\n            \"YEAR BUILT\": pl.UInt16,\n            \"ZIP OR POSTAL CODE\": pl.UInt32,\n            \"PRICE\": pl.UInt32,\n            \"SQUARE FEET\": pl.UInt32,\n            \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\": str,\n            \"LATITUDE\": pl.Float32,\n            \"LONGITUDE\": pl.Float32,\n        }\n        self.search_params = None\n        self.column_dict = {key: False for key in CATEGORY_PATTERNS.keys()}\n\n    def set_search_params(self, zip: str, search_filters: dict[str, Any]) -> None:\n        \"\"\"Set the parameters for searching by ZIP code.\n\n        Args:\n            zip (str): the ZIP code\n            search_filters (dict[str, Any]): search filters for appending to a gis-csv path\n        \"\"\"\n        try:\n            region_info = self.get_region_info_from_zipcode(zip)\n        except json.JSONDecodeError:\n            log(f\"Could not decode region info for {zip}.\", \"warn\")\n            return None\n        except HTTPError:\n            log(f\"Could not retrieve region info for {zip}.\", \"warn\")\n            return None\n\n        if search_filters.get(\"for sale sold\") == \"Sold\":\n            sort_order = self.SortOrder.MOST_RECENTLY_SOLD.value\n        else:\n            sort_order = self.SortOrder.NEWEST.value\n        # TODO make sure to fix filtering so that its not just \"single family homes\"\n\n        try:\n            market = region_info[\"payload\"][\"rootDefaults\"][\"market\"]\n            region_id = region_info[\"payload\"][\"rootDefaults\"][\"region_id\"]\n            status = str(region_info[\"payload\"][\"rootDefaults\"][\"status\"])\n        except KeyError:\n            log(\"Market, region, or status could not be identified \", \"warn\")\n            return None\n\n        self.search_params = {\n            \"al\": 1,\n            \"has_deal\": \"false\",\n            \"has_dishwasher\": \"false\",\n            \"has_laundry_facility\": \"false\",\n            \"has_laundry_hookups\": \"false\",\n            \"has_parking\": \"false\",\n            \"has_pool\": \"false\",\n            \"has_short_term_lease\": \"false\",\n            \"include_pending_homes\": \"false\",  # probably an \"include\" option\n            \"isRentals\": \"false\",\n            \"is_furnished\": \"false\",\n            \"is_income_restricted\": \"false\",\n            \"is_senior_living\": \"false\",\n            \"max_year_built\": search_filters.get(\"max year built\"),\n            \"min_year_built\": search_filters.get(\"min year built\"),\n            \"market\": market,\n            \"min_stories\": search_filters.get(\"min stories\"),\n            \"num_homes\": 350,\n            \"ord\": sort_order,\n            \"page_number\": \"1\",\n            \"pool\": \"false\",\n            \"region_id\": region_id,\n            \"region_type\": \"2\",\n            \"status\": status,\n            \"travel_with_traffic\": \"false\",\n            \"travel_within_region\": \"false\",\n            \"utilities_included\": \"false\",\n            \"v\": \"8\",\n        }\n        if search_filters.get(\"for sale sold\") == \"Sold\":\n            self.search_params[\"sold_within_days\"] = search_filters.get(\"sold within\")\n            self.search_params[\"status\"] = 9\n        else:\n            self.search_params[\"sf\"] = \"1, 2, 3, 4, 5, 6, 7\"\n            match [\n                search_filters.get(\"status coming soon\"),\n                search_filters.get(\"status active\"),\n                search_filters.get(\"status pending\"),\n            ]:\n                case [True, False, False]:\n                    status = \"8\"\n                case [False, True, False]:\n                    status = \"1\"\n                case [False, False, True]:\n                    status = \"130\"\n                case [True, True, False]:\n                    status = \"9\"\n                case [False, True, True]:\n                    status = \"139\"\n                case [True, False, True]:\n                    status = \"138\"\n                case [True, True, True]:\n                    status = \"139\"\n\n            self.search_params[\"status\"] = status\n\n        if (max_sqft := search_filters.get(\"max sqft\")) != \"None\":\n            self.search_params[\"max_sqft\"] = max_sqft\n        if (min_sqft := search_filters.get(\"min sqft\")) != \"None\":\n            self.search_params[\"min_sqft\"] = min_sqft\n\n        if (max_price := search_filters.get(\"max price\")) != \"None\":\n            self.search_params[\"max_price\"] = max_price\n        if (min_price := search_filters.get(\"min price\")) != \"None\":\n            self.search_params[\"min_price\"] = min_price\n\n        houses = \"\"  # figure out how to join into comma string\n        if search_filters.get(\"house type house\") is True:\n            houses = houses + \"1\"\n        if search_filters.get(\"house type condo\") is True:\n            houses = houses + \"2\"\n        if search_filters.get(\"house type townhouse\") is True:\n            houses = houses + \"3\"\n        if search_filters.get(\"house type mul fam\") is True:\n            houses = houses + \"4\"\n\n        self.search_params[\"uipt\"] = \",\".join(list(houses))\n\n    # redfin setup\n    def meta_request_download(self, url: str, search_params) -> str:\n        \"\"\"Method for downloading objects from Redfin.\n\n        Args:\n            url (str): the Redfin URL\n\n        Returns:\n            str: the unicode text response\n        \"\"\"\n        response = requests.get(\n            self.rf.base + url, params=search_params, headers=self.rf.user_agent_header\n        )\n        log(response.request.url, \"debug\")\n        response.raise_for_status()\n        return response.text\n\n    def working_below_the_fold(self, property_id: str, listing_id: str = \"\") -> Any:\n        \"\"\"A below_the_fold method that accepts a listing ID.\n        Note:\n            If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it\n\n        Args:\n            property_id (str): the property ID\n            listing_id (str): The listing ID. Defaults to False.\n\n        Returns:\n            Any: response\n        \"\"\"\n        if listing_id:\n            params = {\n                \"accessLevel\": 1,\n                \"propertyId\": property_id,\n                \"listingId\": listing_id,\n                \"pageType\": 1,\n            }\n        else:\n            params = {\n                \"accessLevel\": 1,\n                \"propertyId\": property_id,\n                \"pageType\": 1,\n            }\n        return self.rf.meta_request(\"/api/home/details/belowTheFold\", params)\n\n    def get_region_info_from_zipcode(self, zip_code: str) -> Any:\n        \"\"\"Get the region ifo from a ZIP code.\n\n        Args:\n            zip_code (str): the ZIP code\n\n        Returns:\n            Any: response\n        \"\"\"\n        return self.rf.meta_request(\n            \"api/region\", {\"region_id\": zip_code, \"region_type\": 2, \"tz\": True, \"v\": 8}\n        )\n\n    def get_gis_csv(self, params: dict[str, Any]) -> str:\n        \"\"\"Get the gis-csv of an area based on the contents of `params`\n\n        Args:\n            params (dict[str, Any]): the parameters\n\n        Returns:\n            str: the CSV file as a unicode string\n        \"\"\"\n        return self.meta_request_download(\"api/gis-csv\", search_params=params)\n\n    # calls stuff\n    def get_heating_info_from_super_group(self, super_group: dict) -> list[str]:\n        \"\"\"Extract heating information from a super group\n\n        :\n            Must supply a probable heating group for accurate information\n\n            Format of super group in JSON:\n            {\n                types: []\n                amenityGroups: [\n                    {\n                        groupTitle: \"\"\n                        referenceName : \"\"\n                        amenityEntries : [\n                            {\n                                amenityName : \"\"\n                                referenceName: \"\"\n                                accessLevel : 1\n                                displayLevel : 1\n                                amenityValues : []\n                            },...\n                        ]\n                    }\n                ]\n                titleString: \"\"\n            }\n\n            Format of groupTitle/propertyDetailsHeader on website:\n                Interior -> titleString\n                ...\n                    Heating & Cooling -> groupTitle\n                        Electric -> no amenityName\n                        Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName\n                        Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In\n\n        Args:\n            super_group (dict): the super group to extract terms from\n\n        Returns:\n            list[str]: list of heating terms\n        \"\"\"\n        amenity_values = []\n        for amenity in super_group.get(\"amenityGroups\", \"\"):  #\n            if not any(\n                AMENITY_GROUP_INCLUDE_PATTERNS.findall(amenity.get(\"groupTitle\", \"\"))\n            ):\n                continue  # this is the name that is bold\n            # these are the bulleted items.\n            for amenity_entry in amenity.get(\"amenityEntries\", \"\"):\n                # if == \"\", then item is dangling (no word before colon). give the same treatment to \"utilities: ...\" as if it were ==\"\"\n                amenity_name = amenity_entry.get(\"amenityName\", \"\")\n\n                if amenity_name and not any(\n                    re.compile(\"utilit\", re.I).findall(amenity_name)\n                ):\n                    # filter the before colon. first if is to have stricter capture rule when amenity item is \"Utilities: Natural gas, heat pump, ...\"\n                    if any(\n                        AMENITY_NAME_INCLUDE_PATTERNS.findall(amenity_name)\n                    ) and not any(AMENITY_NAME_EXCLUDE_PATTERNS.findall(amenity_name)):\n                        amenity_values.extend(\n                            [\n                                value\n                                for value in amenity_entry.get(\"amenityValues\", \"\")\n                                if any(\n                                    regex.findall(value)\n                                    for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS\n                                )\n                                and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))\n                            ]\n                        )\n                else:\n                    # filter for appliance if dangling or in utilities bullet item\n                    amenity_values.extend(\n                        [\n                            value\n                            for value in amenity_entry.get(\"amenityValues\", \"\")\n                            if any(\n                                regex.findall(value)\n                                for regex in APPLIANCE_HEATING_RELATED_PATTERNS\n                            )\n                        ]\n                    )\n        return amenity_values\n\n    def get_super_groups_from_url(self, listing_url: str) -> list | None:\n        \"\"\"Get super group list from listing url.\n\n        Args:\n            listing_url (str): The path part of the listing URL. This is without the \"redfin.com\" part. Include the first forward slash\n\n        Returns:\n            list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found\n        \"\"\"\n        if \"redfin\" in listing_url:\n            listing_url = urlparse(listing_url).path\n\n        try:\n            time.sleep(random.uniform(1.2, 2.1))\n            initial_info = self.rf.initial_info(listing_url)\n        except json.JSONDecodeError:\n            log(f\"Could not get initial info for {listing_url =}\", \"warn\")\n            return None\n        try:\n            property_id = initial_info[\"payload\"][\"propertyId\"]\n        except KeyError:\n            log(\"Could not find property id\", \"critical\")\n            return None\n        try:\n            listing_id = initial_info[\"payload\"][\"listingId\"]\n        except KeyError:\n            listing_id = None\n            log(\n                \"Could not find listing id. Will try to continue. if errors in final zip csv, this might be the issue\",\n                \"warn\",\n            )\n        try:\n            time.sleep(random.uniform(1.1, 2.1))\n            if listing_id is None:\n                mls_data = self.working_below_the_fold(property_id)\n            else:\n                mls_data = self.working_below_the_fold(property_id, listing_id)\n        except json.JSONDecodeError:\n            log(f\"Could not find mls details for {listing_url = }\", \"warn\")\n            return None\n        try:\n            super_groups = mls_data[\"payload\"][\"amenitiesInfo\"][\"superGroups\"]\n        except KeyError:\n            log(f\"Could not find property details for {listing_url = }\", \"warn\")\n            return None\n        return super_groups\n\n    def get_heating_terms_dict_from_listing(\n        self, address_and_url_list: list[str]\n    ) -> dict[str, bool]:\n        \"\"\"Generate a filled out dictionary based on `self.column_dict` and the contents of :meth:get_heating_info_from_super_group(address_url_list).\n\n        TODO:\n            Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen\n\n        Args:\n            address_and_url_list (list[str]): address in the first position, and the listing URL in the second position\n\n        Returns:\n            dict[str, bool]: the filled out `self.column_dict` for the supplied address/listing URL\n        \"\"\"\n        address = address_and_url_list[0]\n        listing_url = address_and_url_list[1]\n        terms = []\n\n        super_groups = self.get_super_groups_from_url(listing_url)\n        if super_groups is None:\n            log(\n                \"No amenities found\", \"info\"\n            )  # this and \"There was no heating information for {address}\" should be made in caller?\n            return copy.deepcopy(self.column_dict)\n        for super_group in super_groups:  # dict\n            if any(\n                SUPER_GROUP_INCLUDE_PATTERNS.findall(super_group.get(\"titleString\", \"\"))\n            ):\n                terms.extend(\n                    self.get_heating_info_from_super_group(super_group)\n                )  # this will be like [gas, electricity, heat pump]\n        if len(terms) == 0:\n            log(\n                f\"There was no heating information for {urlparse(listing_url).path}\",\n                \"info\",\n            )\n            return copy.deepcopy(self.column_dict)\n\n        # categorize the correct dict and return\n        master_dict = copy.deepcopy(self.column_dict)\n        for input_string in terms:\n            log(f\"{input_string = }\", \"debug\")\n            result = {}\n            for key, pattern in CATEGORY_PATTERNS.items():\n                if bool(re.search(pattern, input_string)):\n                    result[key] = True\n                    log(f\"Pattern matched on {key, pattern = }\", \"debug\")\n                log(f\"Pattern did not match on {key, pattern = }\", \"debug\")\n            for key in result.keys():\n                master_dict[key] = result[key] | master_dict[key]\n\n        # You'll have to df.unnest this for use in a dataframe\n        log(f\"{terms = }\", \"debug\")\n        log(f\"{master_dict = }\", \"debug\")\n        log(f\"Heating amenities found for {address}.\", \"info\")\n        return master_dict\n\n    def get_gis_csv_from_zip_with_filters(\n        self,\n    ) -> pl.DataFrame | None:\n        \"\"\"Clean the GIS CSV retrieved from using the `search_params` field into the desired schema.\n\n        Returns:\n            pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.\n        \"\"\"\n        if self.search_params is None:\n            return\n        csv_text = self.get_gis_csv(self.search_params)\n\n        home_types: str = self.search_params.get(\"uipt\", \"\")\n        if \"1\" in home_types:\n            home_types = home_types.replace(\"1\", \"Single Family Residential\")\n        if \"2\" in home_types:\n            home_types = home_types.replace(\"2\", \"Condo/Co-op\")\n        if \"3\" in home_types:\n            home_types = home_types.replace(\"3\", \"Townhouse\")\n        if \"4\" in home_types:\n            home_types = home_types.replace(\"4\", r\"Multi-Family \\(2-4 Unit\\)\")\n\n        try:\n            df = (\n                pl.read_csv(io.StringIO(csv_text), dtypes=self.DESIRED_CSV_SCHEMA)\n                .filter(\n                    pl.col(\"PROPERTY TYPE\").str.contains(\n                        \"|\".join(home_types.split(\",\"))\n                    )\n                )\n                .select(\n                    \"ADDRESS\",\n                    \"CITY\",\n                    \"STATE OR PROVINCE\",\n                    \"YEAR BUILT\",\n                    \"ZIP OR POSTAL CODE\",\n                    \"PRICE\",\n                    \"SQUARE FEET\",\n                    \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\",\n                    \"LATITUDE\",\n                    \"LONGITUDE\",\n                )\n            )\n            if df.height == 0:\n                log(\n                    \"CSV was empty. This can happen if local MLS rules dont allow downloads.\",\n                    \"debug\",\n                )\n                return None\n        except Exception as e:\n            log(f\"Could not read gis csv into dataframe.\\n{csv_text = }\\n{e}\", \"warn\")\n            return None\n        return df\n\n    def get_gis_csv_for_zips_in_metro_with_filters(\n        self, msa_name: str, search_filters: dict[str, Any]\n    ) -> pl.DataFrame | None:\n        \"\"\"Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.\n\n        Args:\n            msa_name (str): a Metropolitan Statistical Area\n            search_filters (dict[str, Any]): filters to search with. generate using :meth:\n\n        Returns:\n            pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs\n        \"\"\"\n        log(f\"Searching {msa_name} with filters {search_filters}.\", \"log\")\n        zip_codes = metro_name_to_zip_code_list(msa_name)\n        formatted_zip_codes = [f\"{zip_code:0{5}}\" for zip_code in zip_codes]\n        log(\n            f\"Estimated search time: {len(formatted_zip_codes) * (1.75+1.5)}\",\n            \"info\",\n        )\n        list_of_csv_dfs = []\n        for zip in formatted_zip_codes:\n            time.sleep(random.uniform(1.5, 2))\n            self.set_search_params(zip, search_filters)\n            temp = self.get_gis_csv_from_zip_with_filters()\n            if temp is None:\n                log(f\"Did not find any houses in {zip}.\", \"info\")\n                continue\n            log(f\"Found data for {temp.height} houses in {zip}.\", \"info\")\n            list_of_csv_dfs.append(temp)\n\n        if len(list_of_csv_dfs) == 0:\n            return None\n        return pl.concat(list_of_csv_dfs)\n\n    def get_house_attributes_from_metro(\n        self,\n        msa_name: str,\n        search_filters: dict[str, Any],\n        use_cached_gis_csv_csv: bool = False,\n    ) -> None:\n        \"\"\"Main function. Get the heating attributes of a Metropolitan Statistical Area.\n\n        TODO:\n            statistics on metropolitan\n            Log statistics about the heating outlook of a metro.\n\n        Args:\n            msa_name (str): Metropolitan Statistical Area name\n            search_filters (dict[str, Any]): search filters\n            use_cached_gis_csv_csv (bool, optional): Whether to use an already made GIS CSV DataFrame. Defaults to False.\n\n        Returns:\n            None: None if there were no houses found in the metro\n        \"\"\"\n        msa_name_file_safe = msa_name.strip().replace(\", \", \"_\").replace(\" \", \"_\")\n        metro_output_dir_path = Path(OUTPUT_DIR_PATH) / msa_name_file_safe\n\n        if use_cached_gis_csv_csv:\n            log(\"Loading csv from cache.\", \"info\")\n            try:\n                search_page_csvs_df = pl.read_csv(\n                    metro_output_dir_path / (msa_name_file_safe + \".csv\"),\n                    dtypes=self.DESIRED_CSV_SCHEMA,\n                )\n                log(\n                    f\"Loading csv from {metro_output_dir_path / (msa_name_file_safe + \".csv\")} is complete.\",\n                    \"info\",\n                )\n            except FileNotFoundError:\n                log(\n                    f\"Loading csv from {metro_output_dir_path / (msa_name_file_safe + \".csv\")} has failed, continuing with API search.\",\n                    \"info\",\n                )\n                search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(\n                    msa_name, search_filters\n                )\n        else:\n            search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(\n                msa_name, search_filters\n            )\n\n        if search_page_csvs_df is None:\n            log(f\"No houses found within {msa_name}. Try relaxing filters.\", \"info\")\n            return None\n\n        url_col_name = \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\"\n        search_page_csvs_df = search_page_csvs_df.filter(\n            (~pl.col(url_col_name).str.contains(\"(?i)unknown\"))\n            .and_(pl.col(\"ADDRESS\").str.len_chars().gt(0))\n            .and_(pl.col(\"SQUARE FEET\").is_not_null())\n            .and_(pl.col(\"YEAR BUILT\").is_not_null())\n        )\n        # .unique(subset=[\"LATITUDE\", \"LONGITUDE\"], maintain_order=True)\n        # sometimes when there are two of the same listings you'll see the lot and the house. cant determine at this stage, so just leaving duplicates. hopefully this can be handled in viewer\n        # also somehow gets GIS-CSV for search pages that dont allow it\n\n        log(f\"Found {search_page_csvs_df.height} possible houses in {msa_name}\", \"info\")\n        os.makedirs(metro_output_dir_path, exist_ok=True)\n        log(\n            f\"Writing csv for metro to {metro_output_dir_path / (msa_name_file_safe + \".csv\")}\",\n            \"debug\",\n        )\n        search_page_csvs_df.write_csv(\n            metro_output_dir_path / (msa_name_file_safe + \".csv\")\n        )\n\n        # go through whole csv and get the house attributes for each house. then partition the dataframe by ZIP and save files\n\n        log(\"Starting lookups on listing URLS\", \"info\")\n        log(\n            f\"Unique ZIP codes: {search_page_csvs_df[\"ZIP OR POSTAL CODE\"].n_unique()}\",\n            \"info\",\n        )\n        log(\n            f\"Estimated completion time: {search_page_csvs_df.height * 3.58} seconds\",\n            \"info\",\n        )\n\n        list_of_dfs_by_zip = search_page_csvs_df.partition_by(\"ZIP OR POSTAL CODE\")\n\n        for df_of_zip in list_of_dfs_by_zip:\n            df_of_zip = (\n                df_of_zip.with_columns(\n                    pl.concat_list([pl.col(\"ADDRESS\"), pl.col(url_col_name)])\n                    .map_elements(self.get_heating_terms_dict_from_listing)\n                    .alias(\"nest\")\n                )\n                .drop(url_col_name)\n                .unnest(\"nest\")\n            )\n\n            zip = df_of_zip.select(\"ZIP OR POSTAL CODE\").item(0, 0)\n            df_of_zip.write_csv(f\"{metro_output_dir_path}{os.sep}{zip}.csv\")\n\n        # log(f\"In {msa_name}, there are {} homes with Electric fuel, {} homes with Natural Gas, {} homes with Propane, {} homes with Diesel/Heating Oil, {} homes with Wood/Pellet, {} homes with Solar Heating, {} homes with Heat Pumps, {} homes with Baseboard, {} homes with Furnace, {} homes with Boiler, {} homes with Radiator, {} homes with Radiant Floor\")\n        log(f\"Done with searching houses in {msa_name}!\", \"info\")\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_gis_csv","title":"get_gis_csv(params)","text":"

    Get the gis-csv of an area based on the contents of params

    Parameters:

    Name Type Description Default params dict[str, Any]

    the parameters

    required

    Returns:

    Name Type Description str str

    the CSV file as a unicode string

    Source code in src\\backend\\redfinscraper.py
    def get_gis_csv(self, params: dict[str, Any]) -> str:\n    \"\"\"Get the gis-csv of an area based on the contents of `params`\n\n    Args:\n        params (dict[str, Any]): the parameters\n\n    Returns:\n        str: the CSV file as a unicode string\n    \"\"\"\n    return self.meta_request_download(\"api/gis-csv\", search_params=params)\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_gis_csv_for_zips_in_metro_with_filters","title":"get_gis_csv_for_zips_in_metro_with_filters(msa_name, search_filters)","text":"

    Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.

    Parameters:

    Name Type Description Default msa_name str

    a Metropolitan Statistical Area

    required search_filters dict[str, Any]

    filters to search with. generate using :meth:

    required

    Returns:

    Type Description DataFrame | None

    pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs

    Source code in src\\backend\\redfinscraper.py
    def get_gis_csv_for_zips_in_metro_with_filters(\n    self, msa_name: str, search_filters: dict[str, Any]\n) -> pl.DataFrame | None:\n    \"\"\"Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.\n\n    Args:\n        msa_name (str): a Metropolitan Statistical Area\n        search_filters (dict[str, Any]): filters to search with. generate using :meth:\n\n    Returns:\n        pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs\n    \"\"\"\n    log(f\"Searching {msa_name} with filters {search_filters}.\", \"log\")\n    zip_codes = metro_name_to_zip_code_list(msa_name)\n    formatted_zip_codes = [f\"{zip_code:0{5}}\" for zip_code in zip_codes]\n    log(\n        f\"Estimated search time: {len(formatted_zip_codes) * (1.75+1.5)}\",\n        \"info\",\n    )\n    list_of_csv_dfs = []\n    for zip in formatted_zip_codes:\n        time.sleep(random.uniform(1.5, 2))\n        self.set_search_params(zip, search_filters)\n        temp = self.get_gis_csv_from_zip_with_filters()\n        if temp is None:\n            log(f\"Did not find any houses in {zip}.\", \"info\")\n            continue\n        log(f\"Found data for {temp.height} houses in {zip}.\", \"info\")\n        list_of_csv_dfs.append(temp)\n\n    if len(list_of_csv_dfs) == 0:\n        return None\n    return pl.concat(list_of_csv_dfs)\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_gis_csv_from_zip_with_filters","title":"get_gis_csv_from_zip_with_filters()","text":"

    Clean the GIS CSV retrieved from using the search_params field into the desired schema.

    Returns:

    Type Description DataFrame | None

    pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.

    Source code in src\\backend\\redfinscraper.py
    def get_gis_csv_from_zip_with_filters(\n    self,\n) -> pl.DataFrame | None:\n    \"\"\"Clean the GIS CSV retrieved from using the `search_params` field into the desired schema.\n\n    Returns:\n        pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.\n    \"\"\"\n    if self.search_params is None:\n        return\n    csv_text = self.get_gis_csv(self.search_params)\n\n    home_types: str = self.search_params.get(\"uipt\", \"\")\n    if \"1\" in home_types:\n        home_types = home_types.replace(\"1\", \"Single Family Residential\")\n    if \"2\" in home_types:\n        home_types = home_types.replace(\"2\", \"Condo/Co-op\")\n    if \"3\" in home_types:\n        home_types = home_types.replace(\"3\", \"Townhouse\")\n    if \"4\" in home_types:\n        home_types = home_types.replace(\"4\", r\"Multi-Family \\(2-4 Unit\\)\")\n\n    try:\n        df = (\n            pl.read_csv(io.StringIO(csv_text), dtypes=self.DESIRED_CSV_SCHEMA)\n            .filter(\n                pl.col(\"PROPERTY TYPE\").str.contains(\n                    \"|\".join(home_types.split(\",\"))\n                )\n            )\n            .select(\n                \"ADDRESS\",\n                \"CITY\",\n                \"STATE OR PROVINCE\",\n                \"YEAR BUILT\",\n                \"ZIP OR POSTAL CODE\",\n                \"PRICE\",\n                \"SQUARE FEET\",\n                \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\",\n                \"LATITUDE\",\n                \"LONGITUDE\",\n            )\n        )\n        if df.height == 0:\n            log(\n                \"CSV was empty. This can happen if local MLS rules dont allow downloads.\",\n                \"debug\",\n            )\n            return None\n    except Exception as e:\n        log(f\"Could not read gis csv into dataframe.\\n{csv_text = }\\n{e}\", \"warn\")\n        return None\n    return df\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_heating_info_from_super_group","title":"get_heating_info_from_super_group(super_group)","text":"

    Extract heating information from a super group

    : Must supply a probable heating group for accurate information

    Format of super group in JSON:\n{\n    types: []\n    amenityGroups: [\n        {\n            groupTitle: \"\"\n            referenceName : \"\"\n            amenityEntries : [\n                {\n                    amenityName : \"\"\n                    referenceName: \"\"\n                    accessLevel : 1\n                    displayLevel : 1\n                    amenityValues : []\n                },...\n            ]\n        }\n    ]\n    titleString: \"\"\n}\n\nFormat of groupTitle/propertyDetailsHeader on website:\n    Interior -> titleString\n    ...\n        Heating & Cooling -> groupTitle\n            Electric -> no amenityName\n            Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName\n            Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In\n

    Parameters:

    Name Type Description Default super_group dict

    the super group to extract terms from

    required

    Returns:

    Type Description list[str]

    list[str]: list of heating terms

    Source code in src\\backend\\redfinscraper.py
    def get_heating_info_from_super_group(self, super_group: dict) -> list[str]:\n    \"\"\"Extract heating information from a super group\n\n    :\n        Must supply a probable heating group for accurate information\n\n        Format of super group in JSON:\n        {\n            types: []\n            amenityGroups: [\n                {\n                    groupTitle: \"\"\n                    referenceName : \"\"\n                    amenityEntries : [\n                        {\n                            amenityName : \"\"\n                            referenceName: \"\"\n                            accessLevel : 1\n                            displayLevel : 1\n                            amenityValues : []\n                        },...\n                    ]\n                }\n            ]\n            titleString: \"\"\n        }\n\n        Format of groupTitle/propertyDetailsHeader on website:\n            Interior -> titleString\n            ...\n                Heating & Cooling -> groupTitle\n                    Electric -> no amenityName\n                    Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName\n                    Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In\n\n    Args:\n        super_group (dict): the super group to extract terms from\n\n    Returns:\n        list[str]: list of heating terms\n    \"\"\"\n    amenity_values = []\n    for amenity in super_group.get(\"amenityGroups\", \"\"):  #\n        if not any(\n            AMENITY_GROUP_INCLUDE_PATTERNS.findall(amenity.get(\"groupTitle\", \"\"))\n        ):\n            continue  # this is the name that is bold\n        # these are the bulleted items.\n        for amenity_entry in amenity.get(\"amenityEntries\", \"\"):\n            # if == \"\", then item is dangling (no word before colon). give the same treatment to \"utilities: ...\" as if it were ==\"\"\n            amenity_name = amenity_entry.get(\"amenityName\", \"\")\n\n            if amenity_name and not any(\n                re.compile(\"utilit\", re.I).findall(amenity_name)\n            ):\n                # filter the before colon. first if is to have stricter capture rule when amenity item is \"Utilities: Natural gas, heat pump, ...\"\n                if any(\n                    AMENITY_NAME_INCLUDE_PATTERNS.findall(amenity_name)\n                ) and not any(AMENITY_NAME_EXCLUDE_PATTERNS.findall(amenity_name)):\n                    amenity_values.extend(\n                        [\n                            value\n                            for value in amenity_entry.get(\"amenityValues\", \"\")\n                            if any(\n                                regex.findall(value)\n                                for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS\n                            )\n                            and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))\n                        ]\n                    )\n            else:\n                # filter for appliance if dangling or in utilities bullet item\n                amenity_values.extend(\n                    [\n                        value\n                        for value in amenity_entry.get(\"amenityValues\", \"\")\n                        if any(\n                            regex.findall(value)\n                            for regex in APPLIANCE_HEATING_RELATED_PATTERNS\n                        )\n                    ]\n                )\n    return amenity_values\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_heating_terms_dict_from_listing","title":"get_heating_terms_dict_from_listing(address_and_url_list)","text":"

    Generate a filled out dictionary based on self.column_dict and the contents of :meth:get_heating_info_from_super_group(address_url_list).

    TODO

    Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen

    Parameters:

    Name Type Description Default address_and_url_list list[str]

    address in the first position, and the listing URL in the second position

    required

    Returns:

    Type Description dict[str, bool]

    dict[str, bool]: the filled out self.column_dict for the supplied address/listing URL

    Source code in src\\backend\\redfinscraper.py
    def get_heating_terms_dict_from_listing(\n    self, address_and_url_list: list[str]\n) -> dict[str, bool]:\n    \"\"\"Generate a filled out dictionary based on `self.column_dict` and the contents of :meth:get_heating_info_from_super_group(address_url_list).\n\n    TODO:\n        Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen\n\n    Args:\n        address_and_url_list (list[str]): address in the first position, and the listing URL in the second position\n\n    Returns:\n        dict[str, bool]: the filled out `self.column_dict` for the supplied address/listing URL\n    \"\"\"\n    address = address_and_url_list[0]\n    listing_url = address_and_url_list[1]\n    terms = []\n\n    super_groups = self.get_super_groups_from_url(listing_url)\n    if super_groups is None:\n        log(\n            \"No amenities found\", \"info\"\n        )  # this and \"There was no heating information for {address}\" should be made in caller?\n        return copy.deepcopy(self.column_dict)\n    for super_group in super_groups:  # dict\n        if any(\n            SUPER_GROUP_INCLUDE_PATTERNS.findall(super_group.get(\"titleString\", \"\"))\n        ):\n            terms.extend(\n                self.get_heating_info_from_super_group(super_group)\n            )  # this will be like [gas, electricity, heat pump]\n    if len(terms) == 0:\n        log(\n            f\"There was no heating information for {urlparse(listing_url).path}\",\n            \"info\",\n        )\n        return copy.deepcopy(self.column_dict)\n\n    # categorize the correct dict and return\n    master_dict = copy.deepcopy(self.column_dict)\n    for input_string in terms:\n        log(f\"{input_string = }\", \"debug\")\n        result = {}\n        for key, pattern in CATEGORY_PATTERNS.items():\n            if bool(re.search(pattern, input_string)):\n                result[key] = True\n                log(f\"Pattern matched on {key, pattern = }\", \"debug\")\n            log(f\"Pattern did not match on {key, pattern = }\", \"debug\")\n        for key in result.keys():\n            master_dict[key] = result[key] | master_dict[key]\n\n    # You'll have to df.unnest this for use in a dataframe\n    log(f\"{terms = }\", \"debug\")\n    log(f\"{master_dict = }\", \"debug\")\n    log(f\"Heating amenities found for {address}.\", \"info\")\n    return master_dict\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_house_attributes_from_metro","title":"get_house_attributes_from_metro(msa_name, search_filters, use_cached_gis_csv_csv=False)","text":"

    Main function. Get the heating attributes of a Metropolitan Statistical Area.

    TODO

    statistics on metropolitan Log statistics about the heating outlook of a metro.

    Parameters:

    Name Type Description Default msa_name str

    Metropolitan Statistical Area name

    required search_filters dict[str, Any]

    search filters

    required use_cached_gis_csv_csv bool

    Whether to use an already made GIS CSV DataFrame. Defaults to False.

    False

    Returns:

    Name Type Description None None

    None if there were no houses found in the metro

    Source code in src\\backend\\redfinscraper.py
    def get_house_attributes_from_metro(\n    self,\n    msa_name: str,\n    search_filters: dict[str, Any],\n    use_cached_gis_csv_csv: bool = False,\n) -> None:\n    \"\"\"Main function. Get the heating attributes of a Metropolitan Statistical Area.\n\n    TODO:\n        statistics on metropolitan\n        Log statistics about the heating outlook of a metro.\n\n    Args:\n        msa_name (str): Metropolitan Statistical Area name\n        search_filters (dict[str, Any]): search filters\n        use_cached_gis_csv_csv (bool, optional): Whether to use an already made GIS CSV DataFrame. Defaults to False.\n\n    Returns:\n        None: None if there were no houses found in the metro\n    \"\"\"\n    msa_name_file_safe = msa_name.strip().replace(\", \", \"_\").replace(\" \", \"_\")\n    metro_output_dir_path = Path(OUTPUT_DIR_PATH) / msa_name_file_safe\n\n    if use_cached_gis_csv_csv:\n        log(\"Loading csv from cache.\", \"info\")\n        try:\n            search_page_csvs_df = pl.read_csv(\n                metro_output_dir_path / (msa_name_file_safe + \".csv\"),\n                dtypes=self.DESIRED_CSV_SCHEMA,\n            )\n            log(\n                f\"Loading csv from {metro_output_dir_path / (msa_name_file_safe + \".csv\")} is complete.\",\n                \"info\",\n            )\n        except FileNotFoundError:\n            log(\n                f\"Loading csv from {metro_output_dir_path / (msa_name_file_safe + \".csv\")} has failed, continuing with API search.\",\n                \"info\",\n            )\n            search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(\n                msa_name, search_filters\n            )\n    else:\n        search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(\n            msa_name, search_filters\n        )\n\n    if search_page_csvs_df is None:\n        log(f\"No houses found within {msa_name}. Try relaxing filters.\", \"info\")\n        return None\n\n    url_col_name = \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\"\n    search_page_csvs_df = search_page_csvs_df.filter(\n        (~pl.col(url_col_name).str.contains(\"(?i)unknown\"))\n        .and_(pl.col(\"ADDRESS\").str.len_chars().gt(0))\n        .and_(pl.col(\"SQUARE FEET\").is_not_null())\n        .and_(pl.col(\"YEAR BUILT\").is_not_null())\n    )\n    # .unique(subset=[\"LATITUDE\", \"LONGITUDE\"], maintain_order=True)\n    # sometimes when there are two of the same listings you'll see the lot and the house. cant determine at this stage, so just leaving duplicates. hopefully this can be handled in viewer\n    # also somehow gets GIS-CSV for search pages that dont allow it\n\n    log(f\"Found {search_page_csvs_df.height} possible houses in {msa_name}\", \"info\")\n    os.makedirs(metro_output_dir_path, exist_ok=True)\n    log(\n        f\"Writing csv for metro to {metro_output_dir_path / (msa_name_file_safe + \".csv\")}\",\n        \"debug\",\n    )\n    search_page_csvs_df.write_csv(\n        metro_output_dir_path / (msa_name_file_safe + \".csv\")\n    )\n\n    # go through whole csv and get the house attributes for each house. then partition the dataframe by ZIP and save files\n\n    log(\"Starting lookups on listing URLS\", \"info\")\n    log(\n        f\"Unique ZIP codes: {search_page_csvs_df[\"ZIP OR POSTAL CODE\"].n_unique()}\",\n        \"info\",\n    )\n    log(\n        f\"Estimated completion time: {search_page_csvs_df.height * 3.58} seconds\",\n        \"info\",\n    )\n\n    list_of_dfs_by_zip = search_page_csvs_df.partition_by(\"ZIP OR POSTAL CODE\")\n\n    for df_of_zip in list_of_dfs_by_zip:\n        df_of_zip = (\n            df_of_zip.with_columns(\n                pl.concat_list([pl.col(\"ADDRESS\"), pl.col(url_col_name)])\n                .map_elements(self.get_heating_terms_dict_from_listing)\n                .alias(\"nest\")\n            )\n            .drop(url_col_name)\n            .unnest(\"nest\")\n        )\n\n        zip = df_of_zip.select(\"ZIP OR POSTAL CODE\").item(0, 0)\n        df_of_zip.write_csv(f\"{metro_output_dir_path}{os.sep}{zip}.csv\")\n\n    # log(f\"In {msa_name}, there are {} homes with Electric fuel, {} homes with Natural Gas, {} homes with Propane, {} homes with Diesel/Heating Oil, {} homes with Wood/Pellet, {} homes with Solar Heating, {} homes with Heat Pumps, {} homes with Baseboard, {} homes with Furnace, {} homes with Boiler, {} homes with Radiator, {} homes with Radiant Floor\")\n    log(f\"Done with searching houses in {msa_name}!\", \"info\")\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_region_info_from_zipcode","title":"get_region_info_from_zipcode(zip_code)","text":"

    Get the region ifo from a ZIP code.

    Parameters:

    Name Type Description Default zip_code str

    the ZIP code

    required

    Returns:

    Name Type Description Any Any

    response

    Source code in src\\backend\\redfinscraper.py
    def get_region_info_from_zipcode(self, zip_code: str) -> Any:\n    \"\"\"Get the region ifo from a ZIP code.\n\n    Args:\n        zip_code (str): the ZIP code\n\n    Returns:\n        Any: response\n    \"\"\"\n    return self.rf.meta_request(\n        \"api/region\", {\"region_id\": zip_code, \"region_type\": 2, \"tz\": True, \"v\": 8}\n    )\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_super_groups_from_url","title":"get_super_groups_from_url(listing_url)","text":"

    Get super group list from listing url.

    Parameters:

    Name Type Description Default listing_url str

    The path part of the listing URL. This is without the \"redfin.com\" part. Include the first forward slash

    required

    Returns:

    Type Description list | None

    list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found

    Source code in src\\backend\\redfinscraper.py
    def get_super_groups_from_url(self, listing_url: str) -> list | None:\n    \"\"\"Get super group list from listing url.\n\n    Args:\n        listing_url (str): The path part of the listing URL. This is without the \"redfin.com\" part. Include the first forward slash\n\n    Returns:\n        list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found\n    \"\"\"\n    if \"redfin\" in listing_url:\n        listing_url = urlparse(listing_url).path\n\n    try:\n        time.sleep(random.uniform(1.2, 2.1))\n        initial_info = self.rf.initial_info(listing_url)\n    except json.JSONDecodeError:\n        log(f\"Could not get initial info for {listing_url =}\", \"warn\")\n        return None\n    try:\n        property_id = initial_info[\"payload\"][\"propertyId\"]\n    except KeyError:\n        log(\"Could not find property id\", \"critical\")\n        return None\n    try:\n        listing_id = initial_info[\"payload\"][\"listingId\"]\n    except KeyError:\n        listing_id = None\n        log(\n            \"Could not find listing id. Will try to continue. if errors in final zip csv, this might be the issue\",\n            \"warn\",\n        )\n    try:\n        time.sleep(random.uniform(1.1, 2.1))\n        if listing_id is None:\n            mls_data = self.working_below_the_fold(property_id)\n        else:\n            mls_data = self.working_below_the_fold(property_id, listing_id)\n    except json.JSONDecodeError:\n        log(f\"Could not find mls details for {listing_url = }\", \"warn\")\n        return None\n    try:\n        super_groups = mls_data[\"payload\"][\"amenitiesInfo\"][\"superGroups\"]\n    except KeyError:\n        log(f\"Could not find property details for {listing_url = }\", \"warn\")\n        return None\n    return super_groups\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.meta_request_download","title":"meta_request_download(url, search_params)","text":"

    Method for downloading objects from Redfin.

    Parameters:

    Name Type Description Default url str

    the Redfin URL

    required

    Returns:

    Name Type Description str str

    the unicode text response

    Source code in src\\backend\\redfinscraper.py
    def meta_request_download(self, url: str, search_params) -> str:\n    \"\"\"Method for downloading objects from Redfin.\n\n    Args:\n        url (str): the Redfin URL\n\n    Returns:\n        str: the unicode text response\n    \"\"\"\n    response = requests.get(\n        self.rf.base + url, params=search_params, headers=self.rf.user_agent_header\n    )\n    log(response.request.url, \"debug\")\n    response.raise_for_status()\n    return response.text\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.set_search_params","title":"set_search_params(zip, search_filters)","text":"

    Set the parameters for searching by ZIP code.

    Parameters:

    Name Type Description Default zip str

    the ZIP code

    required search_filters dict[str, Any]

    search filters for appending to a gis-csv path

    required Source code in src\\backend\\redfinscraper.py
    def set_search_params(self, zip: str, search_filters: dict[str, Any]) -> None:\n    \"\"\"Set the parameters for searching by ZIP code.\n\n    Args:\n        zip (str): the ZIP code\n        search_filters (dict[str, Any]): search filters for appending to a gis-csv path\n    \"\"\"\n    try:\n        region_info = self.get_region_info_from_zipcode(zip)\n    except json.JSONDecodeError:\n        log(f\"Could not decode region info for {zip}.\", \"warn\")\n        return None\n    except HTTPError:\n        log(f\"Could not retrieve region info for {zip}.\", \"warn\")\n        return None\n\n    if search_filters.get(\"for sale sold\") == \"Sold\":\n        sort_order = self.SortOrder.MOST_RECENTLY_SOLD.value\n    else:\n        sort_order = self.SortOrder.NEWEST.value\n    # TODO make sure to fix filtering so that its not just \"single family homes\"\n\n    try:\n        market = region_info[\"payload\"][\"rootDefaults\"][\"market\"]\n        region_id = region_info[\"payload\"][\"rootDefaults\"][\"region_id\"]\n        status = str(region_info[\"payload\"][\"rootDefaults\"][\"status\"])\n    except KeyError:\n        log(\"Market, region, or status could not be identified \", \"warn\")\n        return None\n\n    self.search_params = {\n        \"al\": 1,\n        \"has_deal\": \"false\",\n        \"has_dishwasher\": \"false\",\n        \"has_laundry_facility\": \"false\",\n        \"has_laundry_hookups\": \"false\",\n        \"has_parking\": \"false\",\n        \"has_pool\": \"false\",\n        \"has_short_term_lease\": \"false\",\n        \"include_pending_homes\": \"false\",  # probably an \"include\" option\n        \"isRentals\": \"false\",\n        \"is_furnished\": \"false\",\n        \"is_income_restricted\": \"false\",\n        \"is_senior_living\": \"false\",\n        \"max_year_built\": search_filters.get(\"max year built\"),\n        \"min_year_built\": search_filters.get(\"min year built\"),\n        \"market\": market,\n        \"min_stories\": search_filters.get(\"min stories\"),\n        \"num_homes\": 350,\n        \"ord\": sort_order,\n        \"page_number\": \"1\",\n        \"pool\": \"false\",\n        \"region_id\": region_id,\n        \"region_type\": \"2\",\n        \"status\": status,\n        \"travel_with_traffic\": \"false\",\n        \"travel_within_region\": \"false\",\n        \"utilities_included\": \"false\",\n        \"v\": \"8\",\n    }\n    if search_filters.get(\"for sale sold\") == \"Sold\":\n        self.search_params[\"sold_within_days\"] = search_filters.get(\"sold within\")\n        self.search_params[\"status\"] = 9\n    else:\n        self.search_params[\"sf\"] = \"1, 2, 3, 4, 5, 6, 7\"\n        match [\n            search_filters.get(\"status coming soon\"),\n            search_filters.get(\"status active\"),\n            search_filters.get(\"status pending\"),\n        ]:\n            case [True, False, False]:\n                status = \"8\"\n            case [False, True, False]:\n                status = \"1\"\n            case [False, False, True]:\n                status = \"130\"\n            case [True, True, False]:\n                status = \"9\"\n            case [False, True, True]:\n                status = \"139\"\n            case [True, False, True]:\n                status = \"138\"\n            case [True, True, True]:\n                status = \"139\"\n\n        self.search_params[\"status\"] = status\n\n    if (max_sqft := search_filters.get(\"max sqft\")) != \"None\":\n        self.search_params[\"max_sqft\"] = max_sqft\n    if (min_sqft := search_filters.get(\"min sqft\")) != \"None\":\n        self.search_params[\"min_sqft\"] = min_sqft\n\n    if (max_price := search_filters.get(\"max price\")) != \"None\":\n        self.search_params[\"max_price\"] = max_price\n    if (min_price := search_filters.get(\"min price\")) != \"None\":\n        self.search_params[\"min_price\"] = min_price\n\n    houses = \"\"  # figure out how to join into comma string\n    if search_filters.get(\"house type house\") is True:\n        houses = houses + \"1\"\n    if search_filters.get(\"house type condo\") is True:\n        houses = houses + \"2\"\n    if search_filters.get(\"house type townhouse\") is True:\n        houses = houses + \"3\"\n    if search_filters.get(\"house type mul fam\") is True:\n        houses = houses + \"4\"\n\n    self.search_params[\"uipt\"] = \",\".join(list(houses))\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.working_below_the_fold","title":"working_below_the_fold(property_id, listing_id='')","text":"

    A below_the_fold method that accepts a listing ID. Note: If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it

    Parameters:

    Name Type Description Default property_id str

    the property ID

    required listing_id str

    The listing ID. Defaults to False.

    ''

    Returns:

    Name Type Description Any Any

    response

    Source code in src\\backend\\redfinscraper.py
    def working_below_the_fold(self, property_id: str, listing_id: str = \"\") -> Any:\n    \"\"\"A below_the_fold method that accepts a listing ID.\n    Note:\n        If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it\n\n    Args:\n        property_id (str): the property ID\n        listing_id (str): The listing ID. Defaults to False.\n\n    Returns:\n        Any: response\n    \"\"\"\n    if listing_id:\n        params = {\n            \"accessLevel\": 1,\n            \"propertyId\": property_id,\n            \"listingId\": listing_id,\n            \"pageType\": 1,\n        }\n    else:\n        params = {\n            \"accessLevel\": 1,\n            \"propertyId\": property_id,\n            \"pageType\": 1,\n        }\n    return self.rf.meta_request(\"/api/home/details/belowTheFold\", params)\n
    "},{"location":"backend/secondarydata/","title":"Secondarydata","text":""},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever","title":"CensusDataRetriever","text":"

    Interact with the Census data API.

    Note

    ACS5 paths can be found here: https://api.census.gov/data/2019/acs/acs5.html

    Source code in src\\backend\\secondarydata.py
    class CensusDataRetriever:\n    \"\"\"Interact with the Census data API.\n\n    Note:\n        ACS5 paths can be found here: https://api.census.gov/data/2019/acs/acs5.html\"\"\"\n\n    def __init__(self) -> None:\n        self.base_url = \"https://data.census.gov/\"\n        # https://api.census.gov/data/2021/acs/acs5/profile/variables.html\n        self.api_key = os.getenv(\"CENSUS_API_KEY\")\n        if self.api_key is None:\n            log(\n                \"No Census API key found in a .env file in project directory. please request a key at https://api.census.gov/data/key_signup.html\",\n                \"critical\",\n            )\n            exit()\n        self.MAX_COL_NAME_LENGTH = 80\n\n    def _get(self, url: str) -> requests.Response | None:\n        r = requests.get(url, timeout=65)\n        if r.status_code == 400:\n            log(f\"Unknown variable {r.text.split(\"variable \")[-1]}\", \"info\")\n            return None\n        return r\n\n    def get_and_cache_data(\n        self, file_name: str, url_to_lookup_on_miss: str\n    ) -> dict[str, str] | bool:\n        \"\"\"Cache files.\n\n        Args:\n            file_name (str): file name to save/lookup\n            url_to_lookup_on_miss (str): the Census url to lookup\n\n        Returns:\n            bool | dict[str, str] | None | Any: the dict of `tablename: label` or\n        \"\"\"\n        CENSUS_DATA_DIR_PATH.mkdir(parents=True, exist_ok=True)\n\n        my_json = None\n\n        try:\n            with open(CENSUS_DATA_DIR_PATH / file_name, mode=\"r\") as f:\n                log(f\"Reading {file_name}\", \"debug\")\n                try:\n                    my_json = json.load(f)\n                except json.JSONDecodeError:\n                    log(\"Could not decode cached file\", \"error\")\n                    return False\n        except FileNotFoundError:\n            req = self._get(url_to_lookup_on_miss)\n            if req is None:\n                log(f\"Could not find census file {req = }\", \"error\")\n                return False\n            req.raise_for_status()\n            my_json = req.json()\n            with open(CENSUS_DATA_DIR_PATH / file_name, \"w\") as f:\n                json.dump(my_json, f)\n\n        return my_json\n\n    def get_race_makeup_by_zcta(self, zcta: str) -> str | None:\n        \"\"\"Get race make up by zcta from. DO NOT USE\n\n        Note:\n            use `get_table_group_for_zcta_by_state_by_year`\n\n        Args:\n            zcta (str): zcta\n\n        Returns:\n            str | None: text or none\n        \"\"\"\n        # get white, black, american indian/native alaskan, asian, NH/PI, other. note that these are estimates, margin of error can be had with \"M\"\n        req = self._get(\n            f\"https://api.census.gov/data/2021/acs/acs5/profile?get=DP05_0064E,DP05_0065E,DP05_0066E,DP05_0067E,DP05_0068E,DP05_0069E&for=zip%20code%20tabulation%20area:{zcta}&key={self.api_key}\"\n        )\n        if req is None:\n            return None\n        return req.text\n\n    def get_acs5_profile_table_to_group_name(\n        self, table: str, year: str\n    ) -> dict[str, Any] | None:\n        \"\"\"Get a JSON representation of a table's attributes.\n\n        Note:\n            Tables must be:\n                * DP02\n                * DP02PR\n                * DP03\n                * DP04\n                * DP05\n\n            Returned object will have entries similar to:\n            ```json\n            \"DP05_0037M\": {\n                \"label\": \"Margin of Error!!RACE!!Total population!!One race!!White\",\n                \"concept\": \"ACS DEMOGRAPHIC AND HOUSING ESTIMATES\",\n                \"predicateType\": \"int\",\n                \"group\": \"DP05\",\n                \"limit\": 0,\n                \"predicateOnly\": true\n            }\n            ```\n\n        Args:\n            table (str): the table to lookup\n            year (str): which acs5 year to look up\n\n        Returns:\n            str | Any: json object\n        \"\"\"\n        file_name = f\"{year}-acs5-profile-groups-{table}.json\"\n        groups_url = (\n            f\"https://api.census.gov/data/{year}/acs/acs5/profile/groups/{table}.json\"\n        )\n        groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)\n        if groups_to_label_translation is False:\n            log(\"Something is wrong with groups label dict\", \"warn\")\n            return None\n        return groups_to_label_translation[\"variables\"]  # type: ignore\n\n    def translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(\n        self, headers: list[str], table: str, year: str\n    ) -> None:\n        \"\"\"Get the label name for a table and row for the acs5 profile surveys.\n\n        Args:\n            headers (list[str]): header row\n            table (str): have to look again\n            year (str): the year\n\n        Returns:\n            None: translates the list of table_row_selector to its english label\n        \"\"\"\n        # is going to read the file multiple times, save last req as {\"table\": req_json[table]...} for this?\n        groups_to_label_translation_dict = self.get_acs5_profile_table_to_group_name(\n            table, year\n        )\n        if groups_to_label_translation_dict is None:\n            log(\"Could not translate headers\", \"warn\")\n            return groups_to_label_translation_dict\n\n        for idx, header in enumerate(headers):\n            new_col_name_dict = groups_to_label_translation_dict.get(header)\n            if new_col_name_dict is None:\n                # returns none if not in dict, means we have custom name and can continue\n                continue\n            new_col_name = new_col_name_dict[\"label\"]\n            # qgis doesnt allow field names of 80+ chars. massage into form, then cut off\n            # delimiter for table subsection\n            new_col_name = re.sub(\"!!\", \" \", new_col_name)\n            new_col_name = re.sub(r\"\\s+\", \" \", new_col_name)\n            # easier to read\n            new_col_name_parts = new_col_name.split(\" \")\n            for idy, no_format in enumerate(new_col_name_parts):\n                new_col_name_parts[idy] = no_format.capitalize()\n            new_col_name = \"\".join(new_col_name_parts)\n            # shortenings to fit length requirement\n            for key, value in replace_dict.items():\n                new_col_name = re.sub(key, value, new_col_name)\n            # limiter\n            new_col_name = new_col_name[\n                : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)\n            ]\n\n            if new_col_name not in headers[:idx]:\n                headers[idx] = new_col_name\n\n    def get_acs5_profile_table_group_for_zcta_by_year(\n        self, table: str, year: str\n    ) -> str:\n        \"\"\"CSV output of an acs 5 year profile survey table.\n\n        TODO:\n            Update func name\n\n        Args:\n            table (str): census demo acs5 table\n            year (str): year to search\n\n        Returns:\n            str: file path where output is saved\n        \"\"\"\n        file_name = f\"{year}-acs-profile-table-{table}.json\"\n        url = f\"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({table})&for=zip%20code%20tabulation%20area:*\"\n        list_of_list_table_json = self.get_and_cache_data(file_name, url)\n\n        if list_of_list_table_json is False:\n            log(\n                f\"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.\",\n                \"warn\",\n            )\n            return \"\"\n\n        self.translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(\n            list_of_list_table_json[0],  # type: ignore\n            table,\n            year,  # type: ignore\n        )\n\n        df = pl.DataFrame(list_of_list_table_json, orient=\"row\")\n        # funky stuff to get the first list to be the name of the columns\n        df = (\n            df.rename(df.head(1).to_dicts().pop())\n            .slice(1)  # type: ignore\n            .drop(\"NAME\", cs.matches(\"(?i)^(ann)\"), cs.matches(f\"(?i){table}\"))\n            .rename({\"zip code tabulation area\": \"ZCTA\"})\n            .cast(\n                {\n                    \"ZCTA\": pl.Int32,\n                }\n            )\n        )\n        file_path = CENSUS_DATA_DIR_PATH / \"acs5-profile-group-{table}-zcta.csv\"\n        df.write_csv(file_path)\n        return str(file_path)\n\n    def get_acs5_subject_table_to_group_name(\n        self, table: str, year: str\n    ) -> dict[str, Any] | None:\n        \"\"\"Get a JSON representation of a table's attributes.\n\n        Note:\n            Tables can be found at: https://www.census.gov/acs/www/data/data-tables-and-tools/subject-tables/\n\n            Returned object will have entries similar to:\n            ```json\n            \"DP05_0037M\": {\n                \"label\": \"Margin of Error!!RACE!!Total population!!One race!!White\",\n                \"concept\": \"ACS DEMOGRAPHIC AND HOUSING ESTIMATES\",\n                \"predicateType\": \"int\",\n                \"group\": \"DP05\",\n                \"limit\": 0,\n                \"predicateOnly\": true\n            }\n            ```\n\n        Args:\n            table (str): the table to lookup\n            year (str): which acs5 year to look up\n\n        Returns:\n            str | Any: variables\n        \"\"\"\n        file_name = f\"{year}-acs5-subject-groups-{table}.json\"\n        groups_url = (\n            f\"https://api.census.gov/data/{year}/acs/acs5/subject/groups/{table}.json\"\n        )\n        groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)\n        if groups_to_label_translation is False:\n            log(\"Something is wrong with groups label dict\", \"warn\")\n            return None\n        return groups_to_label_translation[\"variables\"]  # type: ignore\n\n    def translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(\n        self, headers: list[str], table: str, year: str\n    ) -> None:\n        \"\"\"Gets the label name for a table and row for the acs5 profile surveys.\n\n        Args:\n            headers (list[str]): headers\n            table (str): table\n            year (str): year\n        \"\"\"\n        # is going to read the file multiple times, save last req as {\"table\": req_json[table]...} for this?\n        groups_to_label_translation_dict = self.get_acs5_subject_table_to_group_name(\n            table, year\n        )\n        if groups_to_label_translation_dict is None:\n            log(\"Could not translate headers\", \"warn\")\n            return groups_to_label_translation_dict\n\n        for idx, header in enumerate(headers):\n            new_col_name_dict = groups_to_label_translation_dict.get(header)\n            if new_col_name_dict is None:\n                # returns none if not in dict, means we have custom name and can continue\n                continue\n            new_col_name = new_col_name_dict[\"label\"]\n            # qgis doesnt allow field names of 80+ chars. massage into form, then cut off\n            # delimiter for table subsection\n            new_col_name = re.sub(\"!!\", \" \", new_col_name)\n            new_col_name = re.sub(r\"\\s+\", \" \", new_col_name)\n            # easier to read\n            new_col_name_parts = new_col_name.split(\" \")\n            for idy, no_format in enumerate(new_col_name_parts):\n                new_col_name_parts[idy] = no_format.capitalize()\n            new_col_name = \"\".join(new_col_name_parts)\n            # shortenings to fit length requirement\n            for key, value in replace_dict.items():\n                new_col_name = re.sub(key, value, new_col_name)\n            # limiter\n            new_col_name = new_col_name[\n                : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)\n            ]\n\n            if new_col_name not in headers[:idx]:\n                headers[idx] = new_col_name\n\n    def get_acs5_subject_table_group_for_zcta_by_year(\n        self, table: str, year: str\n    ) -> str:\n        \"\"\"CSV output of a acs 5 year subject survey table\n\n        Args:\n            table (str): census acs5 table\n            year (str): year to search\n        \"\"\"\n        file_name = f\"{year}-acs-subject-table-{table}.json\"\n        url = f\"https://api.census.gov/data/{year}/acs/acs5/subject?get=group({table})&for=zip%20code%20tabulation%20area:*\"\n        list_of_list_table_json = self.get_and_cache_data(file_name, url)\n\n        if list_of_list_table_json is False:\n            log(\n                f\"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.\",\n                \"warn\",\n            )\n            return \"\"\n\n        self.translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(\n            list_of_list_table_json[0],  # type: ignore\n            table,\n            year,  # type: ignore\n        )\n\n        df = pl.DataFrame(list_of_list_table_json, orient=\"row\")\n        # funky stuff to get the first list to be the name of the columns\n        df = (\n            df.rename(df.head(1).to_dicts().pop())\n            .slice(1)  # type: ignore\n            .drop(\"NAME\", cs.matches(\"(?i)^(ann)\"), cs.matches(f\"(?i){table}\"))\n            .rename({\"zip code tabulation area\": \"ZCTA\"})\n            .cast(\n                {\n                    \"ZCTA\": pl.Int32,\n                }\n            )\n        )\n        file_path = CENSUS_DATA_DIR_PATH / \"acs5-subject-group-{table}-zcta.csv\"\n        # may not have to write. but cache func doesn't return whether it hits or not\n        df.write_csv(file_path)\n        return str(file_path)\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.get_acs5_profile_table_group_for_zcta_by_year","title":"get_acs5_profile_table_group_for_zcta_by_year(table, year)","text":"

    CSV output of an acs 5 year profile survey table.

    TODO

    Update func name

    Parameters:

    Name Type Description Default table str

    census demo acs5 table

    required year str

    year to search

    required

    Returns:

    Name Type Description str str

    file path where output is saved

    Source code in src\\backend\\secondarydata.py
    def get_acs5_profile_table_group_for_zcta_by_year(\n    self, table: str, year: str\n) -> str:\n    \"\"\"CSV output of an acs 5 year profile survey table.\n\n    TODO:\n        Update func name\n\n    Args:\n        table (str): census demo acs5 table\n        year (str): year to search\n\n    Returns:\n        str: file path where output is saved\n    \"\"\"\n    file_name = f\"{year}-acs-profile-table-{table}.json\"\n    url = f\"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({table})&for=zip%20code%20tabulation%20area:*\"\n    list_of_list_table_json = self.get_and_cache_data(file_name, url)\n\n    if list_of_list_table_json is False:\n        log(\n            f\"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.\",\n            \"warn\",\n        )\n        return \"\"\n\n    self.translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(\n        list_of_list_table_json[0],  # type: ignore\n        table,\n        year,  # type: ignore\n    )\n\n    df = pl.DataFrame(list_of_list_table_json, orient=\"row\")\n    # funky stuff to get the first list to be the name of the columns\n    df = (\n        df.rename(df.head(1).to_dicts().pop())\n        .slice(1)  # type: ignore\n        .drop(\"NAME\", cs.matches(\"(?i)^(ann)\"), cs.matches(f\"(?i){table}\"))\n        .rename({\"zip code tabulation area\": \"ZCTA\"})\n        .cast(\n            {\n                \"ZCTA\": pl.Int32,\n            }\n        )\n    )\n    file_path = CENSUS_DATA_DIR_PATH / \"acs5-profile-group-{table}-zcta.csv\"\n    df.write_csv(file_path)\n    return str(file_path)\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.get_acs5_profile_table_to_group_name","title":"get_acs5_profile_table_to_group_name(table, year)","text":"

    Get a JSON representation of a table's attributes.

    Note

    Tables must be: * DP02 * DP02PR * DP03 * DP04 * DP05

    Returned object will have entries similar to:

    \"DP05_0037M\": {\n    \"label\": \"Margin of Error!!RACE!!Total population!!One race!!White\",\n    \"concept\": \"ACS DEMOGRAPHIC AND HOUSING ESTIMATES\",\n    \"predicateType\": \"int\",\n    \"group\": \"DP05\",\n    \"limit\": 0,\n    \"predicateOnly\": true\n}\n

    Parameters:

    Name Type Description Default table str

    the table to lookup

    required year str

    which acs5 year to look up

    required

    Returns:

    Type Description dict[str, Any] | None

    str | Any: json object

    Source code in src\\backend\\secondarydata.py
    def get_acs5_profile_table_to_group_name(\n    self, table: str, year: str\n) -> dict[str, Any] | None:\n    \"\"\"Get a JSON representation of a table's attributes.\n\n    Note:\n        Tables must be:\n            * DP02\n            * DP02PR\n            * DP03\n            * DP04\n            * DP05\n\n        Returned object will have entries similar to:\n        ```json\n        \"DP05_0037M\": {\n            \"label\": \"Margin of Error!!RACE!!Total population!!One race!!White\",\n            \"concept\": \"ACS DEMOGRAPHIC AND HOUSING ESTIMATES\",\n            \"predicateType\": \"int\",\n            \"group\": \"DP05\",\n            \"limit\": 0,\n            \"predicateOnly\": true\n        }\n        ```\n\n    Args:\n        table (str): the table to lookup\n        year (str): which acs5 year to look up\n\n    Returns:\n        str | Any: json object\n    \"\"\"\n    file_name = f\"{year}-acs5-profile-groups-{table}.json\"\n    groups_url = (\n        f\"https://api.census.gov/data/{year}/acs/acs5/profile/groups/{table}.json\"\n    )\n    groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)\n    if groups_to_label_translation is False:\n        log(\"Something is wrong with groups label dict\", \"warn\")\n        return None\n    return groups_to_label_translation[\"variables\"]  # type: ignore\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.get_acs5_subject_table_group_for_zcta_by_year","title":"get_acs5_subject_table_group_for_zcta_by_year(table, year)","text":"

    CSV output of a acs 5 year subject survey table

    Parameters:

    Name Type Description Default table str

    census acs5 table

    required year str

    year to search

    required Source code in src\\backend\\secondarydata.py
    def get_acs5_subject_table_group_for_zcta_by_year(\n    self, table: str, year: str\n) -> str:\n    \"\"\"CSV output of a acs 5 year subject survey table\n\n    Args:\n        table (str): census acs5 table\n        year (str): year to search\n    \"\"\"\n    file_name = f\"{year}-acs-subject-table-{table}.json\"\n    url = f\"https://api.census.gov/data/{year}/acs/acs5/subject?get=group({table})&for=zip%20code%20tabulation%20area:*\"\n    list_of_list_table_json = self.get_and_cache_data(file_name, url)\n\n    if list_of_list_table_json is False:\n        log(\n            f\"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.\",\n            \"warn\",\n        )\n        return \"\"\n\n    self.translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(\n        list_of_list_table_json[0],  # type: ignore\n        table,\n        year,  # type: ignore\n    )\n\n    df = pl.DataFrame(list_of_list_table_json, orient=\"row\")\n    # funky stuff to get the first list to be the name of the columns\n    df = (\n        df.rename(df.head(1).to_dicts().pop())\n        .slice(1)  # type: ignore\n        .drop(\"NAME\", cs.matches(\"(?i)^(ann)\"), cs.matches(f\"(?i){table}\"))\n        .rename({\"zip code tabulation area\": \"ZCTA\"})\n        .cast(\n            {\n                \"ZCTA\": pl.Int32,\n            }\n        )\n    )\n    file_path = CENSUS_DATA_DIR_PATH / \"acs5-subject-group-{table}-zcta.csv\"\n    # may not have to write. but cache func doesn't return whether it hits or not\n    df.write_csv(file_path)\n    return str(file_path)\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.get_acs5_subject_table_to_group_name","title":"get_acs5_subject_table_to_group_name(table, year)","text":"

    Get a JSON representation of a table's attributes.

    Note

    Tables can be found at: https://www.census.gov/acs/www/data/data-tables-and-tools/subject-tables/

    Returned object will have entries similar to:

    \"DP05_0037M\": {\n    \"label\": \"Margin of Error!!RACE!!Total population!!One race!!White\",\n    \"concept\": \"ACS DEMOGRAPHIC AND HOUSING ESTIMATES\",\n    \"predicateType\": \"int\",\n    \"group\": \"DP05\",\n    \"limit\": 0,\n    \"predicateOnly\": true\n}\n

    Parameters:

    Name Type Description Default table str

    the table to lookup

    required year str

    which acs5 year to look up

    required

    Returns:

    Type Description dict[str, Any] | None

    str | Any: variables

    Source code in src\\backend\\secondarydata.py
    def get_acs5_subject_table_to_group_name(\n    self, table: str, year: str\n) -> dict[str, Any] | None:\n    \"\"\"Get a JSON representation of a table's attributes.\n\n    Note:\n        Tables can be found at: https://www.census.gov/acs/www/data/data-tables-and-tools/subject-tables/\n\n        Returned object will have entries similar to:\n        ```json\n        \"DP05_0037M\": {\n            \"label\": \"Margin of Error!!RACE!!Total population!!One race!!White\",\n            \"concept\": \"ACS DEMOGRAPHIC AND HOUSING ESTIMATES\",\n            \"predicateType\": \"int\",\n            \"group\": \"DP05\",\n            \"limit\": 0,\n            \"predicateOnly\": true\n        }\n        ```\n\n    Args:\n        table (str): the table to lookup\n        year (str): which acs5 year to look up\n\n    Returns:\n        str | Any: variables\n    \"\"\"\n    file_name = f\"{year}-acs5-subject-groups-{table}.json\"\n    groups_url = (\n        f\"https://api.census.gov/data/{year}/acs/acs5/subject/groups/{table}.json\"\n    )\n    groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)\n    if groups_to_label_translation is False:\n        log(\"Something is wrong with groups label dict\", \"warn\")\n        return None\n    return groups_to_label_translation[\"variables\"]  # type: ignore\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.get_and_cache_data","title":"get_and_cache_data(file_name, url_to_lookup_on_miss)","text":"

    Cache files.

    Parameters:

    Name Type Description Default file_name str

    file name to save/lookup

    required url_to_lookup_on_miss str

    the Census url to lookup

    required

    Returns:

    Type Description dict[str, str] | bool

    bool | dict[str, str] | None | Any: the dict of tablename: label or

    Source code in src\\backend\\secondarydata.py
    def get_and_cache_data(\n    self, file_name: str, url_to_lookup_on_miss: str\n) -> dict[str, str] | bool:\n    \"\"\"Cache files.\n\n    Args:\n        file_name (str): file name to save/lookup\n        url_to_lookup_on_miss (str): the Census url to lookup\n\n    Returns:\n        bool | dict[str, str] | None | Any: the dict of `tablename: label` or\n    \"\"\"\n    CENSUS_DATA_DIR_PATH.mkdir(parents=True, exist_ok=True)\n\n    my_json = None\n\n    try:\n        with open(CENSUS_DATA_DIR_PATH / file_name, mode=\"r\") as f:\n            log(f\"Reading {file_name}\", \"debug\")\n            try:\n                my_json = json.load(f)\n            except json.JSONDecodeError:\n                log(\"Could not decode cached file\", \"error\")\n                return False\n    except FileNotFoundError:\n        req = self._get(url_to_lookup_on_miss)\n        if req is None:\n            log(f\"Could not find census file {req = }\", \"error\")\n            return False\n        req.raise_for_status()\n        my_json = req.json()\n        with open(CENSUS_DATA_DIR_PATH / file_name, \"w\") as f:\n            json.dump(my_json, f)\n\n    return my_json\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.get_race_makeup_by_zcta","title":"get_race_makeup_by_zcta(zcta)","text":"

    Get race make up by zcta from. DO NOT USE

    Note

    use get_table_group_for_zcta_by_state_by_year

    Parameters:

    Name Type Description Default zcta str

    zcta

    required

    Returns:

    Type Description str | None

    str | None: text or none

    Source code in src\\backend\\secondarydata.py
    def get_race_makeup_by_zcta(self, zcta: str) -> str | None:\n    \"\"\"Get race make up by zcta from. DO NOT USE\n\n    Note:\n        use `get_table_group_for_zcta_by_state_by_year`\n\n    Args:\n        zcta (str): zcta\n\n    Returns:\n        str | None: text or none\n    \"\"\"\n    # get white, black, american indian/native alaskan, asian, NH/PI, other. note that these are estimates, margin of error can be had with \"M\"\n    req = self._get(\n        f\"https://api.census.gov/data/2021/acs/acs5/profile?get=DP05_0064E,DP05_0065E,DP05_0066E,DP05_0067E,DP05_0068E,DP05_0069E&for=zip%20code%20tabulation%20area:{zcta}&key={self.api_key}\"\n    )\n    if req is None:\n        return None\n    return req.text\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list","title":"translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(headers, table, year)","text":"

    Get the label name for a table and row for the acs5 profile surveys.

    Parameters:

    Name Type Description Default headers list[str]

    header row

    required table str

    have to look again

    required year str

    the year

    required

    Returns:

    Name Type Description None None

    translates the list of table_row_selector to its english label

    Source code in src\\backend\\secondarydata.py
    def translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(\n    self, headers: list[str], table: str, year: str\n) -> None:\n    \"\"\"Get the label name for a table and row for the acs5 profile surveys.\n\n    Args:\n        headers (list[str]): header row\n        table (str): have to look again\n        year (str): the year\n\n    Returns:\n        None: translates the list of table_row_selector to its english label\n    \"\"\"\n    # is going to read the file multiple times, save last req as {\"table\": req_json[table]...} for this?\n    groups_to_label_translation_dict = self.get_acs5_profile_table_to_group_name(\n        table, year\n    )\n    if groups_to_label_translation_dict is None:\n        log(\"Could not translate headers\", \"warn\")\n        return groups_to_label_translation_dict\n\n    for idx, header in enumerate(headers):\n        new_col_name_dict = groups_to_label_translation_dict.get(header)\n        if new_col_name_dict is None:\n            # returns none if not in dict, means we have custom name and can continue\n            continue\n        new_col_name = new_col_name_dict[\"label\"]\n        # qgis doesnt allow field names of 80+ chars. massage into form, then cut off\n        # delimiter for table subsection\n        new_col_name = re.sub(\"!!\", \" \", new_col_name)\n        new_col_name = re.sub(r\"\\s+\", \" \", new_col_name)\n        # easier to read\n        new_col_name_parts = new_col_name.split(\" \")\n        for idy, no_format in enumerate(new_col_name_parts):\n            new_col_name_parts[idy] = no_format.capitalize()\n        new_col_name = \"\".join(new_col_name_parts)\n        # shortenings to fit length requirement\n        for key, value in replace_dict.items():\n            new_col_name = re.sub(key, value, new_col_name)\n        # limiter\n        new_col_name = new_col_name[\n            : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)\n        ]\n\n        if new_col_name not in headers[:idx]:\n            headers[idx] = new_col_name\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list","title":"translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(headers, table, year)","text":"

    Gets the label name for a table and row for the acs5 profile surveys.

    Parameters:

    Name Type Description Default headers list[str]

    headers

    required table str

    table

    required year str

    year

    required Source code in src\\backend\\secondarydata.py
    def translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(\n    self, headers: list[str], table: str, year: str\n) -> None:\n    \"\"\"Gets the label name for a table and row for the acs5 profile surveys.\n\n    Args:\n        headers (list[str]): headers\n        table (str): table\n        year (str): year\n    \"\"\"\n    # is going to read the file multiple times, save last req as {\"table\": req_json[table]...} for this?\n    groups_to_label_translation_dict = self.get_acs5_subject_table_to_group_name(\n        table, year\n    )\n    if groups_to_label_translation_dict is None:\n        log(\"Could not translate headers\", \"warn\")\n        return groups_to_label_translation_dict\n\n    for idx, header in enumerate(headers):\n        new_col_name_dict = groups_to_label_translation_dict.get(header)\n        if new_col_name_dict is None:\n            # returns none if not in dict, means we have custom name and can continue\n            continue\n        new_col_name = new_col_name_dict[\"label\"]\n        # qgis doesnt allow field names of 80+ chars. massage into form, then cut off\n        # delimiter for table subsection\n        new_col_name = re.sub(\"!!\", \" \", new_col_name)\n        new_col_name = re.sub(r\"\\s+\", \" \", new_col_name)\n        # easier to read\n        new_col_name_parts = new_col_name.split(\" \")\n        for idy, no_format in enumerate(new_col_name_parts):\n            new_col_name_parts[idy] = no_format.capitalize()\n        new_col_name = \"\".join(new_col_name_parts)\n        # shortenings to fit length requirement\n        for key, value in replace_dict.items():\n            new_col_name = re.sub(key, value, new_col_name)\n        # limiter\n        new_col_name = new_col_name[\n            : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)\n        ]\n\n        if new_col_name not in headers[:idx]:\n            headers[idx] = new_col_name\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever","title":"EIADataRetriever","text":"

    Interact with the EIA open data API.

    Note

    This is the \"manual\" for this API: https://www.eia.gov/opendata/pdf/EIA-APIv2-HandsOn-Webinar-11-Jan-23.pdf

    Source code in src\\backend\\secondarydata.py
    class EIADataRetriever:\n    \"\"\"Interact with the EIA open data API.\n\n    Note:\n        This is the \"manual\" for this API:\n        https://www.eia.gov/opendata/pdf/EIA-APIv2-HandsOn-Webinar-11-Jan-23.pdf\n    \"\"\"\n\n    HEATING_OIL_STATES_ABBR = {\n        sts.CT.abbr,\n        sts.DC.abbr,\n        sts.DE.abbr,\n        sts.IA.abbr,\n        sts.IL.abbr,\n        sts.IN.abbr,\n        sts.KS.abbr,\n        sts.KY.abbr,\n        sts.MA.abbr,\n        sts.MD.abbr,\n        sts.ME.abbr,\n        sts.MI.abbr,\n        sts.MN.abbr,\n        sts.MO.abbr,\n        sts.NC.abbr,\n        sts.ND.abbr,\n        sts.NE.abbr,\n        sts.NH.abbr,\n        sts.NJ.abbr,\n        sts.NY.abbr,\n        sts.OH.abbr,\n        sts.PA.abbr,\n        sts.RI.abbr,\n        sts.SD.abbr,\n        sts.VA.abbr,\n        sts.VT.abbr,\n        sts.WI.abbr,\n    }\n\n    PROPANE_STATES_ABBR = {\n        sts.AL.abbr,\n        sts.AR.abbr,\n        sts.CO.abbr,\n        sts.CT.abbr,\n        sts.DE.abbr,\n        sts.FL.abbr,\n        sts.GA.abbr,\n        sts.IL.abbr,\n        sts.IN.abbr,\n        sts.KS.abbr,\n        sts.KY.abbr,\n        sts.KY.abbr,\n        sts.MA.abbr,\n        sts.MD.abbr,\n        sts.ME.abbr,\n        sts.MI.abbr,\n        sts.MN.abbr,\n        sts.MO.abbr,\n        sts.MS.abbr,\n        sts.MT.abbr,\n        sts.NC.abbr,\n        sts.ND.abbr,\n        sts.NE.abbr,\n        sts.NH.abbr,\n        sts.NJ.abbr,\n        sts.NY.abbr,\n        sts.OH.abbr,\n        sts.OK.abbr,\n        sts.PA.abbr,\n        sts.RI.abbr,\n        sts.SD.abbr,\n        sts.TN.abbr,\n        sts.TX.abbr,\n        sts.UT.abbr,\n        sts.VA.abbr,\n        sts.VT.abbr,\n        sts.WI.abbr,\n    }\n\n    class HeaterEfficiencies(Enum):\n        \"\"\"Combination of system efficiency and distribution efficiency.\n\n        Note:\n            Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/\n        \"\"\"\n\n        HEAT_PUMP_GEOTHERMAL = 3.69\n        HEAT_PUMP_DUCTLESS = 2.7  # mini split\n        HEAT_PUMP_DUCTED = 2.16\n        BASEBOARD = 1\n        KEROSENE_ROOM_HEATER = 0.87\n        PROPANE_BOILER = 0.837\n        NG_BOILER = 0.828\n        NG_ROOM_HEATER = 0.81\n        PROPANE_ROOM_HEATER = 0.81\n        OIL_BOILER = 0.783\n        WOOD_STOVE = 0.75\n        PELLET_STOVE = 0.75\n        NG_FURNACE = 0.744  #! double check this value\n        PROPANE_FURNACE = 0.744\n        OIL_FURNACE = 0.704\n        PELLET_BOILER = 0.639\n\n    class EnergyType(Enum):\n        PROPANE = 1\n        HEATING_OIL = 2\n        NATURAL_GAS = 3\n        ELECTRICITY = 4\n\n    class PetroleumProductTypes(StrEnum):\n        NATURAL_GAS = \"EPG0\"\n        PROPANE = \"EPLLPA\"\n        HEATING_OIL = \"EPD2F\"\n\n    class FuelBTUConversion(Enum):\n        # https://www.edf.org/sites/default/files/10071_EDF_BottomBarrel_Ch3.pdf\n        # https://www.eia.gov/energyexplained/units-and-calculators/british-thermal-units.php\n        # https://www.eia.gov/energyexplained/units-and-calculators/\n        NO1_OIL_BTU_PER_GAL = 135_000\n        NO2_OIL_BTU_PER_GAL = 140_000\n        NO4_OIL_BTU_PER_GAL = 146_000\n        NO5_OIL_BTU_PER_GAL = 144_500\n        NO6_OIL_BTU_PER_GAL = 150_000\n        HEATING_OIL_BTU_PER_GAL = 138_500\n        ELECTRICITY_BTU_PER_KWH = 3_412.14\n        NG_BTU_PER_MCT = 1_036_000  # 1000 cubic feet of gas\n        NG_BTU_PER_THERM = 100_000\n        PROPANE_BTU_PER_GAL = 91_452\n        WOOD_BTU_PER_CORD = 20_000_000\n\n    def __init__(self):\n        self.eia_base_url = \"https://api.eia.gov/v2\"\n        self.api_key = os.getenv(\"EIA_API_KEY\")\n        if self.api_key is None:\n            log(\n                \"No Census API key found in a .env file in project directory. please request a key at https://www.eia.gov/opendata/register.php\",\n                \"critical\",\n            )\n            exit()\n\n    def price_per_mbtu_with_efficiency(\n        self, energy_price_dict: dict\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Convert an energy source's price per quantity into price per BTU with an efficiency.\n\n        Note:\n            Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf\n\n        See also:\n            `EIADataRetriever.HeaterEfficiencies`\n\n        Args:\n            energy_price_dict (dict): energy source json\n\n        Returns:\n            dict: new dictionary with btu centric pricing\n        \"\"\"\n        #! make new function based on burner type/ end usage type\n        CENTS_IN_DOLLAR = 100\n        match energy_price_dict.get(\"type\"):\n            case self.EnergyType.PROPANE.value:\n                # for loop is done for every case since i dont want to use `eval` or parse a string of division to keep PEMDAS. this is why i dont have an efficiency func yet\n                for key, value in energy_price_dict.items():\n                    if (\n                        key in [\"type\", \"state\", None]\n                        or energy_price_dict.get(key) is None\n                    ):\n                        continue\n                    energy_price_dict[key] = (\n                        value\n                        / (\n                            self.FuelBTUConversion.PROPANE_BTU_PER_GAL.value\n                            * self.HeaterEfficiencies.PROPANE_FURNACE.value\n                        )\n                        * 1_000\n                    )\n            case self.EnergyType.NATURAL_GAS.value:\n                for key, value in energy_price_dict.items():\n                    if (\n                        key in [\"type\", \"state\", None]\n                        or energy_price_dict.get(key) is None\n                    ):\n                        continue\n                    energy_price_dict[key] = (\n                        value\n                        / (\n                            self.FuelBTUConversion.NG_BTU_PER_MCT.value\n                            * self.HeaterEfficiencies.NG_FURNACE.value\n                        )\n                        * 1_000\n                    )\n            case self.EnergyType.ELECTRICITY.value:\n                for key, value in energy_price_dict.items():\n                    if (\n                        key in [\"type\", \"state\", None]\n                        or energy_price_dict.get(key) is None\n                    ):\n                        continue\n                    energy_price_dict[key] = (\n                        value\n                        / CENTS_IN_DOLLAR\n                        / (\n                            self.FuelBTUConversion.ELECTRICITY_BTU_PER_KWH.value\n                            * self.HeaterEfficiencies.HEAT_PUMP_DUCTED.value\n                        )\n                        * 1_000\n                    )\n            case self.EnergyType.HEATING_OIL.value:\n                for key, value in energy_price_dict.items():\n                    if (\n                        key in [\"type\", \"state\", None]\n                        or energy_price_dict.get(key) is None\n                    ):\n                        continue\n                    energy_price_dict[key] = (\n                        value\n                        / (\n                            self.FuelBTUConversion.HEATING_OIL_BTU_PER_GAL.value\n                            * self.HeaterEfficiencies.OIL_BOILER.value\n                        )\n                        * 1_000\n                    )\n            case _:\n                log(\"Could not translate dict to btu per price.\", \"warn\")\n\n        return energy_price_dict\n\n    # api to dict handler Helpers\n    def price_dict_to_clean_dict(\n        self, eia_json: dict, energy_type: EnergyType, state: str\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Clean JSON data returned by EIA's API.\n\n        Args:\n            eia_json (dict): the response JSON\n            energy_type (EnergyType): the energy type\n            state (str): the state\n\n        Returns:\n            dict[str, str | EnergyType | float]: cleaned JSON\n        \"\"\"\n        # price key is different for electricity\n        accessor = \"value\"\n        if \"product\" not in eia_json[\"response\"][\"data\"][0]:\n            accessor = \"price\"\n\n        result_dict = {\n            entry[\"period\"]: entry[f\"{accessor}\"]\n            for entry in eia_json[\"response\"][\"data\"]\n        }\n        result_dict[\"type\"] = energy_type.value\n        result_dict[\"state\"] = state\n\n        return result_dict\n\n    def price_df_to_clean_dict(\n        self, eia_df: pl.DataFrame, energy_type: EnergyType, state: str\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Clean DataFrame data consisting of EIA API data.\n\n        Args:\n            eia_df (pl.DataFrame): the DataFrame to clean\n            energy_type (EnergyType): the energy type\n            state (str): the state\n\n        Returns:\n            dict[str, str|EnergyType|float]: the dict\n        \"\"\"\n        result_dict = {}\n        for row in eia_df.rows(named=True):\n            year_month = f\"{row.get(\"year\")}-{row.get(\"month\"):02}\"\n            if row.get(\"monthly_avg_price\") is not None:\n                result_dict[year_month] = round(row.get(\"monthly_avg_price\"), 3)  # type: ignore\n        result_dict[\"type\"] = energy_type.value\n        result_dict[\"state\"] = state\n        return result_dict\n\n    # api to dict handler\n    def price_to_clean_dict(\n        self, price_struct: dict | pl.DataFrame, energy_type: EnergyType, state: str\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Handle the different data types that EIA data could be stored in.\n\n        Args:\n            price_struct (dict | pl.DataFrame): a data structure containing the year, month, and price info\n            energy_type (EnergyType): the energy type\n            state (str): the state\n\n        Raises:\n            TypeError: raised if the type of `price_struct` is not supported\n\n        Returns:\n            dict[str, str|EnergyType|float]: the normalized and structured data in dict form\n        \"\"\"\n        match price_struct:\n            case dict():\n                return self.price_dict_to_clean_dict(price_struct, energy_type, state)\n            case pl.DataFrame():\n                return self.price_df_to_clean_dict(price_struct, energy_type, state)\n            case _:\n                raise TypeError(f\"Type not supported: {type(energy_type)}\")\n\n    # api interaction\n    def monthly_electricity_price_per_kwh(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> dict[str, Any]:\n        \"\"\"Get a state's average monthly energy price.\n\n        Note:\n            Data is returned in cents/KWh.\n\n        Args:\n            state (str): the 2 character postal code of a state\n            start_date (datetime.date): the start date, inclusive\n            end_date (datetime.date): the end date, non inclusive\n\n        Returns:\n            dict: the dictionary in `year-month: price` form\n        \"\"\"\n        url = f\"{self.eia_base_url}/electricity/retail-sales/data/?frequency=monthly&data[0]=price&facets[stateid][]={state}&facets[sectorid][]=RES&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n        eia_request = req_get_wrapper(url)\n        eia_request.raise_for_status()\n\n        return eia_request.json()\n\n    def monthly_ng_price_per_mcf(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> dict[str, Any]:\n        \"\"\"Get a state's average natural gas price.\n\n        Note:\n            Data is returned in dollars per mega cubic feet.\n\n        Args:\n            state (str): the 2 character postal code of a state\n            start_date (datetime.date): the start date, inclusive\n            end_date (datetime.date): the end date, non inclusive\n\n        Returns:\n            dict: _description_\n        \"\"\"\n        # $/mcf\n        url = f\"https://api.eia.gov/v2/natural-gas/pri/sum/data/?frequency=monthly&data[0]=value&facets[duoarea][]=S{state}&facets[process][]=PRS&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n        eia_request = req_get_wrapper(url)\n        eia_request.raise_for_status()\n\n        return eia_request.json()\n\n    def monthly_heating_season_heating_oil_price_per_gal(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> pl.DataFrame:\n        \"\"\"Get a state's average heating oil price.\n\n        Note:\n            Data returned is in dollars per gallon.\n\n            Only these states are tracked, and only for the months October through March:\n                * CT\n                * DC\n                * DE\n                * IA\n                * IL\n                * IN\n                * KS\n                * KY\n                * MA\n                * MD\n                * ME\n                * MI\n                * MN\n                * MO\n                * NC\n                * ND\n                * NE\n                * NH\n                * NJ\n                * NY\n                * OH\n                * PA\n                * RI\n                * SD\n                * VA\n                * VT\n                * WI\n        Args:\n            state (str): 2 char postal code\n            start_date (datetime.date): the start date, inclusive\n            end_date (datetime.date): the end date, non inclusive\n\n        Returns:\n            dict: _description_\n        \"\"\"\n        # heating season is Oct - march, $/gal\n        url = f\"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[duoarea][]=S{state}&facets[product][]=EPD2F&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n        eia_request = req_get_wrapper(url)\n        eia_request.raise_for_status()\n\n        json = eia_request.json()\n        df = pl.DataFrame(json[\"response\"][\"data\"])\n        # becomes int, so months are sig figs\n        df = df.with_columns(pl.col(\"period\").str.strptime(pl.Date))\n        df = df.with_columns(\n            pl.col(\"period\").dt.year().alias(\"year\"),\n            pl.col(\"period\").dt.month().alias(\"month\"),\n        )\n\n        monthly_avg_price = (\n            df.group_by([\"year\", \"month\"])\n            .agg(pl.col(\"value\").mean().alias(\"monthly_avg_price\"))\n            .sort(\"year\", \"month\")\n        )\n\n        return monthly_avg_price\n\n    def monthly_heating_season_propane_price_per_gal(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> pl.DataFrame:\n        \"\"\"Get a state's average propane price in dollars per gal.\n\n        Note:\n            Only these states are tracked, and only for the months October through Marc:\n                * AL\n                * AR\n                * CO\n                * CT\n                * DE\n                * FL\n                * GA\n                * IL\n                * IN\n                * KS\n                * KY\n                * KY\n                * MA\n                * MD\n                * ME\n                * MI\n                * MN\n                * MO\n                * MS\n                * MT\n                * NC\n                * ND\n                * NE\n                * NH\n                * NJ\n                * NY\n                * OH\n                * OK\n                * PA\n                * RI\n                * SD\n                * TN\n                * TX\n                * UT\n                * VA\n                * VT\n                * WI\n\n        Args:\n            state (str): 2 character postal code\n            start_date (datetime.date): the start date, inclusive\n            end_date (datetime.date): the end date, non inclusive\n\n        Returns:\n            dict: _description_\n        \"\"\"\n        # heating season is Oct - march, $/gal\n        url = f\"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[process][]=PRS&facets[duoarea][]=S{state}&facets[product][]=EPLLPA&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n        eia_request = req_get_wrapper(url)\n        eia_request.raise_for_status()\n\n        json = eia_request.json()\n        # return self.price_json_to_dict(eia_request.json())\n        df = pl.DataFrame(json[\"response\"][\"data\"])\n        # df = df.with_columns(pl.col(\"period\").str.to_date().alias(\"period\"))\n        df = df.with_columns(pl.col(\"period\").str.strptime(pl.Date))\n        df = df.with_columns(\n            pl.col(\"period\").dt.year().alias(\"year\"),\n            pl.col(\"period\").dt.month().alias(\"month\"),\n        )\n\n        monthly_avg_price = (\n            df.group_by([\"year\", \"month\"])\n            .agg(pl.col(\"value\").mean().alias(\"monthly_avg_price\"))\n            .sort(\"year\", \"month\")\n        )\n\n        return monthly_avg_price\n\n    def monthly_price_per_mbtu_by_energy_type(\n        self,\n        energy_type: EnergyType,\n        state: str,\n        start_date: datetime.date,\n        end_date: datetime.date,\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation\n        for changes to data collection during certain years.\n\n        Args:\n            energy_type (EnergyType): The energy type\n            state (str): the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected\n            start_date (datetime.date): the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned\n            end_date (datetime.date): the date for which to end the search. Non inclusive\n\n        Raises:\n            NotImplementedError: Invalid energy type\n\n        Returns:\n            dict: year-month: price in USD to BTU\n        \"\"\"\n        if len(state) > 2:\n            state = sts.lookup(state).abbr  # type: ignore\n        match energy_type:\n            case self.EnergyType.PROPANE:\n                return self.price_per_mbtu_with_efficiency(\n                    self.price_to_clean_dict(\n                        self.monthly_heating_season_propane_price_per_gal(\n                            state, start_date, end_date\n                        ),\n                        energy_type,\n                        state,\n                    )\n                )\n            case self.EnergyType.NATURAL_GAS:\n                return self.price_per_mbtu_with_efficiency(\n                    self.price_to_clean_dict(\n                        self.monthly_ng_price_per_mcf(state, start_date, end_date),\n                        energy_type,\n                        state,\n                    )\n                )\n            case self.EnergyType.ELECTRICITY:\n                return self.price_per_mbtu_with_efficiency(\n                    self.price_to_clean_dict(\n                        self.monthly_electricity_price_per_kwh(\n                            state, start_date, end_date\n                        ),\n                        energy_type,\n                        state,\n                    )\n                )\n            case self.EnergyType.HEATING_OIL:\n                return self.price_per_mbtu_with_efficiency(\n                    self.price_to_clean_dict(\n                        self.monthly_heating_season_heating_oil_price_per_gal(\n                            state, start_date, end_date\n                        ),\n                        energy_type,\n                        state,\n                    )\n                )\n            case _:\n                raise NotImplementedError(f\"Unsupported energy type: {energy_type}\")\n\n    def monthly_price_per_mbtu_by_energy_type_by_state(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> list[Any]:\n        \"\"\"Get all available energy prices per MBTU, taking efficiency into account, for a state.\n\n        Note:\n            Please keep times to within a year. For the non oil and propane, you have to go a month past.\n\n        Args:\n            state (str): 2 character postal code\n            start_date (datetime.date): start date\n            end_date (datetime.date): end date\n\n        Returns:\n            list[Any]: list of price dicts for available energy types for a state\n        \"\"\"\n        if len(state) > 2:\n            state = sts.lookup(state).abbr  # type: ignore\n\n        dicts_to_return = []\n        if state in self.HEATING_OIL_STATES_ABBR:\n            dicts_to_return.append(\n                self.monthly_price_per_mbtu_by_energy_type(\n                    self.EnergyType.HEATING_OIL, state, start_date, end_date\n                )\n            )\n        if state in self.PROPANE_STATES_ABBR:\n            dicts_to_return.append(\n                self.monthly_price_per_mbtu_by_energy_type(\n                    self.EnergyType.PROPANE, state, start_date, end_date\n                )\n            )\n        dicts_to_return.append(\n            self.monthly_price_per_mbtu_by_energy_type(\n                self.EnergyType.NATURAL_GAS, state, start_date, end_date\n            )\n        )\n        dicts_to_return.append(\n            self.monthly_price_per_mbtu_by_energy_type(\n                self.EnergyType.ELECTRICITY, state, start_date, end_date\n            )\n        )\n        log(f\"{dicts_to_return = }\", \"debug\")\n        return dicts_to_return\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.HeaterEfficiencies","title":"HeaterEfficiencies","text":"

    Bases: Enum

    Combination of system efficiency and distribution efficiency.

    Note

    Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/

    Source code in src\\backend\\secondarydata.py
    class HeaterEfficiencies(Enum):\n    \"\"\"Combination of system efficiency and distribution efficiency.\n\n    Note:\n        Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/\n    \"\"\"\n\n    HEAT_PUMP_GEOTHERMAL = 3.69\n    HEAT_PUMP_DUCTLESS = 2.7  # mini split\n    HEAT_PUMP_DUCTED = 2.16\n    BASEBOARD = 1\n    KEROSENE_ROOM_HEATER = 0.87\n    PROPANE_BOILER = 0.837\n    NG_BOILER = 0.828\n    NG_ROOM_HEATER = 0.81\n    PROPANE_ROOM_HEATER = 0.81\n    OIL_BOILER = 0.783\n    WOOD_STOVE = 0.75\n    PELLET_STOVE = 0.75\n    NG_FURNACE = 0.744  #! double check this value\n    PROPANE_FURNACE = 0.744\n    OIL_FURNACE = 0.704\n    PELLET_BOILER = 0.639\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_electricity_price_per_kwh","title":"monthly_electricity_price_per_kwh(state, start_date, end_date)","text":"

    Get a state's average monthly energy price.

    Note

    Data is returned in cents/KWh.

    Parameters:

    Name Type Description Default state str

    the 2 character postal code of a state

    required start_date date

    the start date, inclusive

    required end_date date

    the end date, non inclusive

    required

    Returns:

    Name Type Description dict dict[str, Any]

    the dictionary in year-month: price form

    Source code in src\\backend\\secondarydata.py
    def monthly_electricity_price_per_kwh(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> dict[str, Any]:\n    \"\"\"Get a state's average monthly energy price.\n\n    Note:\n        Data is returned in cents/KWh.\n\n    Args:\n        state (str): the 2 character postal code of a state\n        start_date (datetime.date): the start date, inclusive\n        end_date (datetime.date): the end date, non inclusive\n\n    Returns:\n        dict: the dictionary in `year-month: price` form\n    \"\"\"\n    url = f\"{self.eia_base_url}/electricity/retail-sales/data/?frequency=monthly&data[0]=price&facets[stateid][]={state}&facets[sectorid][]=RES&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n    eia_request = req_get_wrapper(url)\n    eia_request.raise_for_status()\n\n    return eia_request.json()\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_heating_season_heating_oil_price_per_gal","title":"monthly_heating_season_heating_oil_price_per_gal(state, start_date, end_date)","text":"

    Get a state's average heating oil price.

    Note

    Data returned is in dollars per gallon.

    Only these states are tracked, and only for the months October through March: * CT * DC * DE * IA * IL * IN * KS * KY * MA * MD * ME * MI * MN * MO * NC * ND * NE * NH * NJ * NY * OH * PA * RI * SD * VA * VT * WI

    Args: state (str): 2 char postal code start_date (datetime.date): the start date, inclusive end_date (datetime.date): the end date, non inclusive

    Returns:

    Name Type Description dict DataFrame

    description

    Source code in src\\backend\\secondarydata.py
    def monthly_heating_season_heating_oil_price_per_gal(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> pl.DataFrame:\n    \"\"\"Get a state's average heating oil price.\n\n    Note:\n        Data returned is in dollars per gallon.\n\n        Only these states are tracked, and only for the months October through March:\n            * CT\n            * DC\n            * DE\n            * IA\n            * IL\n            * IN\n            * KS\n            * KY\n            * MA\n            * MD\n            * ME\n            * MI\n            * MN\n            * MO\n            * NC\n            * ND\n            * NE\n            * NH\n            * NJ\n            * NY\n            * OH\n            * PA\n            * RI\n            * SD\n            * VA\n            * VT\n            * WI\n    Args:\n        state (str): 2 char postal code\n        start_date (datetime.date): the start date, inclusive\n        end_date (datetime.date): the end date, non inclusive\n\n    Returns:\n        dict: _description_\n    \"\"\"\n    # heating season is Oct - march, $/gal\n    url = f\"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[duoarea][]=S{state}&facets[product][]=EPD2F&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n    eia_request = req_get_wrapper(url)\n    eia_request.raise_for_status()\n\n    json = eia_request.json()\n    df = pl.DataFrame(json[\"response\"][\"data\"])\n    # becomes int, so months are sig figs\n    df = df.with_columns(pl.col(\"period\").str.strptime(pl.Date))\n    df = df.with_columns(\n        pl.col(\"period\").dt.year().alias(\"year\"),\n        pl.col(\"period\").dt.month().alias(\"month\"),\n    )\n\n    monthly_avg_price = (\n        df.group_by([\"year\", \"month\"])\n        .agg(pl.col(\"value\").mean().alias(\"monthly_avg_price\"))\n        .sort(\"year\", \"month\")\n    )\n\n    return monthly_avg_price\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_heating_season_propane_price_per_gal","title":"monthly_heating_season_propane_price_per_gal(state, start_date, end_date)","text":"

    Get a state's average propane price in dollars per gal.

    Note

    Only these states are tracked, and only for the months October through Marc: * AL * AR * CO * CT * DE * FL * GA * IL * IN * KS * KY * KY * MA * MD * ME * MI * MN * MO * MS * MT * NC * ND * NE * NH * NJ * NY * OH * OK * PA * RI * SD * TN * TX * UT * VA * VT * WI

    Parameters:

    Name Type Description Default state str

    2 character postal code

    required start_date date

    the start date, inclusive

    required end_date date

    the end date, non inclusive

    required

    Returns:

    Name Type Description dict DataFrame

    description

    Source code in src\\backend\\secondarydata.py
    def monthly_heating_season_propane_price_per_gal(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> pl.DataFrame:\n    \"\"\"Get a state's average propane price in dollars per gal.\n\n    Note:\n        Only these states are tracked, and only for the months October through Marc:\n            * AL\n            * AR\n            * CO\n            * CT\n            * DE\n            * FL\n            * GA\n            * IL\n            * IN\n            * KS\n            * KY\n            * KY\n            * MA\n            * MD\n            * ME\n            * MI\n            * MN\n            * MO\n            * MS\n            * MT\n            * NC\n            * ND\n            * NE\n            * NH\n            * NJ\n            * NY\n            * OH\n            * OK\n            * PA\n            * RI\n            * SD\n            * TN\n            * TX\n            * UT\n            * VA\n            * VT\n            * WI\n\n    Args:\n        state (str): 2 character postal code\n        start_date (datetime.date): the start date, inclusive\n        end_date (datetime.date): the end date, non inclusive\n\n    Returns:\n        dict: _description_\n    \"\"\"\n    # heating season is Oct - march, $/gal\n    url = f\"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[process][]=PRS&facets[duoarea][]=S{state}&facets[product][]=EPLLPA&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n    eia_request = req_get_wrapper(url)\n    eia_request.raise_for_status()\n\n    json = eia_request.json()\n    # return self.price_json_to_dict(eia_request.json())\n    df = pl.DataFrame(json[\"response\"][\"data\"])\n    # df = df.with_columns(pl.col(\"period\").str.to_date().alias(\"period\"))\n    df = df.with_columns(pl.col(\"period\").str.strptime(pl.Date))\n    df = df.with_columns(\n        pl.col(\"period\").dt.year().alias(\"year\"),\n        pl.col(\"period\").dt.month().alias(\"month\"),\n    )\n\n    monthly_avg_price = (\n        df.group_by([\"year\", \"month\"])\n        .agg(pl.col(\"value\").mean().alias(\"monthly_avg_price\"))\n        .sort(\"year\", \"month\")\n    )\n\n    return monthly_avg_price\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_ng_price_per_mcf","title":"monthly_ng_price_per_mcf(state, start_date, end_date)","text":"

    Get a state's average natural gas price.

    Note

    Data is returned in dollars per mega cubic feet.

    Parameters:

    Name Type Description Default state str

    the 2 character postal code of a state

    required start_date date

    the start date, inclusive

    required end_date date

    the end date, non inclusive

    required

    Returns:

    Name Type Description dict dict[str, Any]

    description

    Source code in src\\backend\\secondarydata.py
    def monthly_ng_price_per_mcf(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> dict[str, Any]:\n    \"\"\"Get a state's average natural gas price.\n\n    Note:\n        Data is returned in dollars per mega cubic feet.\n\n    Args:\n        state (str): the 2 character postal code of a state\n        start_date (datetime.date): the start date, inclusive\n        end_date (datetime.date): the end date, non inclusive\n\n    Returns:\n        dict: _description_\n    \"\"\"\n    # $/mcf\n    url = f\"https://api.eia.gov/v2/natural-gas/pri/sum/data/?frequency=monthly&data[0]=value&facets[duoarea][]=S{state}&facets[process][]=PRS&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n    eia_request = req_get_wrapper(url)\n    eia_request.raise_for_status()\n\n    return eia_request.json()\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_price_per_mbtu_by_energy_type","title":"monthly_price_per_mbtu_by_energy_type(energy_type, state, start_date, end_date)","text":"

    Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation for changes to data collection during certain years.

    Parameters:

    Name Type Description Default energy_type EnergyType

    The energy type

    required state str

    the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected

    required start_date date

    the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned

    required end_date date

    the date for which to end the search. Non inclusive

    required

    Raises:

    Type Description NotImplementedError

    Invalid energy type

    Returns:

    Name Type Description dict dict[str, str | EnergyType | float]

    year-month: price in USD to BTU

    Source code in src\\backend\\secondarydata.py
    def monthly_price_per_mbtu_by_energy_type(\n    self,\n    energy_type: EnergyType,\n    state: str,\n    start_date: datetime.date,\n    end_date: datetime.date,\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation\n    for changes to data collection during certain years.\n\n    Args:\n        energy_type (EnergyType): The energy type\n        state (str): the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected\n        start_date (datetime.date): the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned\n        end_date (datetime.date): the date for which to end the search. Non inclusive\n\n    Raises:\n        NotImplementedError: Invalid energy type\n\n    Returns:\n        dict: year-month: price in USD to BTU\n    \"\"\"\n    if len(state) > 2:\n        state = sts.lookup(state).abbr  # type: ignore\n    match energy_type:\n        case self.EnergyType.PROPANE:\n            return self.price_per_mbtu_with_efficiency(\n                self.price_to_clean_dict(\n                    self.monthly_heating_season_propane_price_per_gal(\n                        state, start_date, end_date\n                    ),\n                    energy_type,\n                    state,\n                )\n            )\n        case self.EnergyType.NATURAL_GAS:\n            return self.price_per_mbtu_with_efficiency(\n                self.price_to_clean_dict(\n                    self.monthly_ng_price_per_mcf(state, start_date, end_date),\n                    energy_type,\n                    state,\n                )\n            )\n        case self.EnergyType.ELECTRICITY:\n            return self.price_per_mbtu_with_efficiency(\n                self.price_to_clean_dict(\n                    self.monthly_electricity_price_per_kwh(\n                        state, start_date, end_date\n                    ),\n                    energy_type,\n                    state,\n                )\n            )\n        case self.EnergyType.HEATING_OIL:\n            return self.price_per_mbtu_with_efficiency(\n                self.price_to_clean_dict(\n                    self.monthly_heating_season_heating_oil_price_per_gal(\n                        state, start_date, end_date\n                    ),\n                    energy_type,\n                    state,\n                )\n            )\n        case _:\n            raise NotImplementedError(f\"Unsupported energy type: {energy_type}\")\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_price_per_mbtu_by_energy_type_by_state","title":"monthly_price_per_mbtu_by_energy_type_by_state(state, start_date, end_date)","text":"

    Get all available energy prices per MBTU, taking efficiency into account, for a state.

    Note

    Please keep times to within a year. For the non oil and propane, you have to go a month past.

    Parameters:

    Name Type Description Default state str

    2 character postal code

    required start_date date

    start date

    required end_date date

    end date

    required

    Returns:

    Type Description list[Any]

    list[Any]: list of price dicts for available energy types for a state

    Source code in src\\backend\\secondarydata.py
    def monthly_price_per_mbtu_by_energy_type_by_state(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> list[Any]:\n    \"\"\"Get all available energy prices per MBTU, taking efficiency into account, for a state.\n\n    Note:\n        Please keep times to within a year. For the non oil and propane, you have to go a month past.\n\n    Args:\n        state (str): 2 character postal code\n        start_date (datetime.date): start date\n        end_date (datetime.date): end date\n\n    Returns:\n        list[Any]: list of price dicts for available energy types for a state\n    \"\"\"\n    if len(state) > 2:\n        state = sts.lookup(state).abbr  # type: ignore\n\n    dicts_to_return = []\n    if state in self.HEATING_OIL_STATES_ABBR:\n        dicts_to_return.append(\n            self.monthly_price_per_mbtu_by_energy_type(\n                self.EnergyType.HEATING_OIL, state, start_date, end_date\n            )\n        )\n    if state in self.PROPANE_STATES_ABBR:\n        dicts_to_return.append(\n            self.monthly_price_per_mbtu_by_energy_type(\n                self.EnergyType.PROPANE, state, start_date, end_date\n            )\n        )\n    dicts_to_return.append(\n        self.monthly_price_per_mbtu_by_energy_type(\n            self.EnergyType.NATURAL_GAS, state, start_date, end_date\n        )\n    )\n    dicts_to_return.append(\n        self.monthly_price_per_mbtu_by_energy_type(\n            self.EnergyType.ELECTRICITY, state, start_date, end_date\n        )\n    )\n    log(f\"{dicts_to_return = }\", \"debug\")\n    return dicts_to_return\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.price_df_to_clean_dict","title":"price_df_to_clean_dict(eia_df, energy_type, state)","text":"

    Clean DataFrame data consisting of EIA API data.

    Parameters:

    Name Type Description Default eia_df DataFrame

    the DataFrame to clean

    required energy_type EnergyType

    the energy type

    required state str

    the state

    required

    Returns:

    Type Description dict[str, str | EnergyType | float]

    dict[str, str|EnergyType|float]: the dict

    Source code in src\\backend\\secondarydata.py
    def price_df_to_clean_dict(\n    self, eia_df: pl.DataFrame, energy_type: EnergyType, state: str\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Clean DataFrame data consisting of EIA API data.\n\n    Args:\n        eia_df (pl.DataFrame): the DataFrame to clean\n        energy_type (EnergyType): the energy type\n        state (str): the state\n\n    Returns:\n        dict[str, str|EnergyType|float]: the dict\n    \"\"\"\n    result_dict = {}\n    for row in eia_df.rows(named=True):\n        year_month = f\"{row.get(\"year\")}-{row.get(\"month\"):02}\"\n        if row.get(\"monthly_avg_price\") is not None:\n            result_dict[year_month] = round(row.get(\"monthly_avg_price\"), 3)  # type: ignore\n    result_dict[\"type\"] = energy_type.value\n    result_dict[\"state\"] = state\n    return result_dict\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.price_dict_to_clean_dict","title":"price_dict_to_clean_dict(eia_json, energy_type, state)","text":"

    Clean JSON data returned by EIA's API.

    Parameters:

    Name Type Description Default eia_json dict

    the response JSON

    required energy_type EnergyType

    the energy type

    required state str

    the state

    required

    Returns:

    Type Description dict[str, str | EnergyType | float]

    dict[str, str | EnergyType | float]: cleaned JSON

    Source code in src\\backend\\secondarydata.py
    def price_dict_to_clean_dict(\n    self, eia_json: dict, energy_type: EnergyType, state: str\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Clean JSON data returned by EIA's API.\n\n    Args:\n        eia_json (dict): the response JSON\n        energy_type (EnergyType): the energy type\n        state (str): the state\n\n    Returns:\n        dict[str, str | EnergyType | float]: cleaned JSON\n    \"\"\"\n    # price key is different for electricity\n    accessor = \"value\"\n    if \"product\" not in eia_json[\"response\"][\"data\"][0]:\n        accessor = \"price\"\n\n    result_dict = {\n        entry[\"period\"]: entry[f\"{accessor}\"]\n        for entry in eia_json[\"response\"][\"data\"]\n    }\n    result_dict[\"type\"] = energy_type.value\n    result_dict[\"state\"] = state\n\n    return result_dict\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.price_per_mbtu_with_efficiency","title":"price_per_mbtu_with_efficiency(energy_price_dict)","text":"

    Convert an energy source's price per quantity into price per BTU with an efficiency.

    Note

    Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf

    See also

    EIADataRetriever.HeaterEfficiencies

    Parameters:

    Name Type Description Default energy_price_dict dict

    energy source json

    required

    Returns:

    Name Type Description dict dict[str, str | EnergyType | float]

    new dictionary with btu centric pricing

    Source code in src\\backend\\secondarydata.py
    def price_per_mbtu_with_efficiency(\n    self, energy_price_dict: dict\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Convert an energy source's price per quantity into price per BTU with an efficiency.\n\n    Note:\n        Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf\n\n    See also:\n        `EIADataRetriever.HeaterEfficiencies`\n\n    Args:\n        energy_price_dict (dict): energy source json\n\n    Returns:\n        dict: new dictionary with btu centric pricing\n    \"\"\"\n    #! make new function based on burner type/ end usage type\n    CENTS_IN_DOLLAR = 100\n    match energy_price_dict.get(\"type\"):\n        case self.EnergyType.PROPANE.value:\n            # for loop is done for every case since i dont want to use `eval` or parse a string of division to keep PEMDAS. this is why i dont have an efficiency func yet\n            for key, value in energy_price_dict.items():\n                if (\n                    key in [\"type\", \"state\", None]\n                    or energy_price_dict.get(key) is None\n                ):\n                    continue\n                energy_price_dict[key] = (\n                    value\n                    / (\n                        self.FuelBTUConversion.PROPANE_BTU_PER_GAL.value\n                        * self.HeaterEfficiencies.PROPANE_FURNACE.value\n                    )\n                    * 1_000\n                )\n        case self.EnergyType.NATURAL_GAS.value:\n            for key, value in energy_price_dict.items():\n                if (\n                    key in [\"type\", \"state\", None]\n                    or energy_price_dict.get(key) is None\n                ):\n                    continue\n                energy_price_dict[key] = (\n                    value\n                    / (\n                        self.FuelBTUConversion.NG_BTU_PER_MCT.value\n                        * self.HeaterEfficiencies.NG_FURNACE.value\n                    )\n                    * 1_000\n                )\n        case self.EnergyType.ELECTRICITY.value:\n            for key, value in energy_price_dict.items():\n                if (\n                    key in [\"type\", \"state\", None]\n                    or energy_price_dict.get(key) is None\n                ):\n                    continue\n                energy_price_dict[key] = (\n                    value\n                    / CENTS_IN_DOLLAR\n                    / (\n                        self.FuelBTUConversion.ELECTRICITY_BTU_PER_KWH.value\n                        * self.HeaterEfficiencies.HEAT_PUMP_DUCTED.value\n                    )\n                    * 1_000\n                )\n        case self.EnergyType.HEATING_OIL.value:\n            for key, value in energy_price_dict.items():\n                if (\n                    key in [\"type\", \"state\", None]\n                    or energy_price_dict.get(key) is None\n                ):\n                    continue\n                energy_price_dict[key] = (\n                    value\n                    / (\n                        self.FuelBTUConversion.HEATING_OIL_BTU_PER_GAL.value\n                        * self.HeaterEfficiencies.OIL_BOILER.value\n                    )\n                    * 1_000\n                )\n        case _:\n            log(\"Could not translate dict to btu per price.\", \"warn\")\n\n    return energy_price_dict\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.price_to_clean_dict","title":"price_to_clean_dict(price_struct, energy_type, state)","text":"

    Handle the different data types that EIA data could be stored in.

    Parameters:

    Name Type Description Default price_struct dict | DataFrame

    a data structure containing the year, month, and price info

    required energy_type EnergyType

    the energy type

    required state str

    the state

    required

    Raises:

    Type Description TypeError

    raised if the type of price_struct is not supported

    Returns:

    Type Description dict[str, str | EnergyType | float]

    dict[str, str|EnergyType|float]: the normalized and structured data in dict form

    Source code in src\\backend\\secondarydata.py
    def price_to_clean_dict(\n    self, price_struct: dict | pl.DataFrame, energy_type: EnergyType, state: str\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Handle the different data types that EIA data could be stored in.\n\n    Args:\n        price_struct (dict | pl.DataFrame): a data structure containing the year, month, and price info\n        energy_type (EnergyType): the energy type\n        state (str): the state\n\n    Raises:\n        TypeError: raised if the type of `price_struct` is not supported\n\n    Returns:\n        dict[str, str|EnergyType|float]: the normalized and structured data in dict form\n    \"\"\"\n    match price_struct:\n        case dict():\n            return self.price_dict_to_clean_dict(price_struct, energy_type, state)\n        case pl.DataFrame():\n            return self.price_df_to_clean_dict(price_struct, energy_type, state)\n        case _:\n            raise TypeError(f\"Type not supported: {type(energy_type)}\")\n
    "},{"location":"backend/us/states/","title":"States","text":""},{"location":"backend/us/states/#backend.us.states.State","title":"State","text":"Source code in src\\backend\\us\\states.py
    class State:\n    abbr: str\n    ap_abbr: Optional[str]\n    capital: Optional[str]\n    capital_tz: Optional[str]\n    fips: Optional[str]\n    is_territory: bool\n    is_obsolete: bool\n    is_contiguous: bool\n    is_continental: bool\n    name: str\n    name_metaphone: str\n    statehood_year: Optional[int]\n    time_zones: List[str]\n\n    def __init__(self, **kwargs):\n        for k, v in kwargs.items():\n            setattr(self, k, v)\n\n    def __repr__(self) -> str:\n        return f\"<State:{self.name}>\"\n\n    def __str__(self) -> str:\n        return self.name\n\n    def shapefile_urls(self) -> Optional[Dict[str, str]]:\n        \"\"\"Shapefiles are available directly from the US Census Bureau:\n        https://www.census.gov/cgi-bin/geo/shapefiles/index.php\n        \"\"\"\n\n        fips = self.fips\n\n        if not fips:\n            return None\n\n        base = \"https://www2.census.gov/geo/tiger/TIGER2010/\"\n        urls = {\n            \"tract\": urljoin(base, f\"TRACT/2010/tl_2010_{fips}_tract10.zip\"),\n            \"cd\": urljoin(base, f\"CD/111/tl_2010_{fips}_cd111.zip\"),\n            \"county\": urljoin(base, f\"COUNTY/2010/tl_2010_{fips}_county10.zip\"),\n            \"state\": urljoin(base, f\"STATE/2010/tl_2010_{fips}_state10.zip\"),\n            \"zcta\": urljoin(base, f\"ZCTA5/2010/tl_2010_{fips}_zcta510.zip\"),\n            \"block\": urljoin(base, f\"TABBLOCK/2010/tl_2010_{fips}_tabblock10.zip\"),\n            \"blockgroup\": urljoin(base, f\"BG/2010/tl_2010_{fips}_bg10.zip\"),\n        }\n\n        return urls\n
    "},{"location":"backend/us/states/#backend.us.states.State.shapefile_urls","title":"shapefile_urls()","text":"

    Shapefiles are available directly from the US Census Bureau: https://www.census.gov/cgi-bin/geo/shapefiles/index.php

    Source code in src\\backend\\us\\states.py
    def shapefile_urls(self) -> Optional[Dict[str, str]]:\n    \"\"\"Shapefiles are available directly from the US Census Bureau:\n    https://www.census.gov/cgi-bin/geo/shapefiles/index.php\n    \"\"\"\n\n    fips = self.fips\n\n    if not fips:\n        return None\n\n    base = \"https://www2.census.gov/geo/tiger/TIGER2010/\"\n    urls = {\n        \"tract\": urljoin(base, f\"TRACT/2010/tl_2010_{fips}_tract10.zip\"),\n        \"cd\": urljoin(base, f\"CD/111/tl_2010_{fips}_cd111.zip\"),\n        \"county\": urljoin(base, f\"COUNTY/2010/tl_2010_{fips}_county10.zip\"),\n        \"state\": urljoin(base, f\"STATE/2010/tl_2010_{fips}_state10.zip\"),\n        \"zcta\": urljoin(base, f\"ZCTA5/2010/tl_2010_{fips}_zcta510.zip\"),\n        \"block\": urljoin(base, f\"TABBLOCK/2010/tl_2010_{fips}_tabblock10.zip\"),\n        \"blockgroup\": urljoin(base, f\"BG/2010/tl_2010_{fips}_bg10.zip\"),\n    }\n\n    return urls\n
    "},{"location":"backend/us/states/#backend.us.states.lookup","title":"lookup(val, field=None, use_cache=True)","text":"

    State lookup. This method will make a best effort attempt at finding the state based on the lookup value provided.

    • two digits will search for FIPS code
    • two letters will search for state abbreviation

    Exact matches can be done on any attribute on State objects by passing the field argument. This does an exact, case-sensitive comparison against the specified field.

    This method caches non-None results, but can the cache can be bypassed with the use_cache=False argument.

    Source code in src\\backend\\us\\states.py
    def lookup(val, field: Optional[str] = None, use_cache: bool = True) -> Optional[State]:\n    \"\"\"State lookup. This method will make a best effort\n    attempt at finding the state based on the lookup value provided.\n\n      * two digits will search for FIPS code\n      * two letters will search for state abbreviation\n\n    Exact matches can be done on any attribute on State objects by passing\n    the `field` argument. This does an exact, case-sensitive comparison against\n    the specified field.\n\n    This method caches non-None results, but can the cache can be bypassed\n    with the `use_cache=False` argument.\n    \"\"\"\n\n    matched_state = None\n\n    if field is None:\n        if FIPS_RE.match(val):\n            field = \"fips\"\n        elif ABBR_RE.match(val):\n            val = val.upper()\n            field = \"abbr\"\n        else:\n            val = val.title()\n            field = \"name\"\n\n    # see if result is in cache\n    cache_key = f\"{field}:{val}\"\n    if use_cache and cache_key in _lookup_cache:\n        matched_state = _lookup_cache[cache_key]\n\n    for state in STATES_AND_TERRITORIES:\n        if val == getattr(state, field):\n            matched_state = state\n            if use_cache:\n                _lookup_cache[cache_key] = state\n\n    return matched_state\n
    "},{"location":"backend/us/unitedstatesofamerica/","title":"Unitedstatesofamerica","text":""},{"location":"gui/app/","title":"App","text":""},{"location":"gui/datapage/","title":"Datapage","text":""},{"location":"gui/datapage/#gui.datapage.DataPage","title":"DataPage","text":"

    Bases: CTkFrame

    Crate page for displaying energy data and links to censusreporter.org for census level data

    Source code in src\\gui\\datapage.py
    class DataPage(ctk.CTkFrame):\n    \"\"\"Crate page for displaying energy data and links to censusreporter.org for census level data\"\"\"\n\n    def __init__(self, master, **kwargs):\n        super().__init__(master, **kwargs)\n        self.msa_name = None\n        self.income_df = None\n        self.demog_df = None\n        self.states_in_msa = None\n        self.state_demog_dfs = None\n        self.state_income_dfs = None\n        self.cur_year = datetime.datetime.now().year\n        self.years = [\n            str(self.cur_year),\n            str(self.cur_year - 1),\n            str(self.cur_year - 2),\n            str(self.cur_year - 3),\n            str(self.cur_year - 4),\n        ]\n        self.roboto_font = ctk.CTkFont(family=\"Roboto\")\n        self.roboto_header_font = ctk.CTkFont(family=\"Roboto\", size=28)\n        self.roboto_link_font = ctk.CTkFont(family=\"Roboto\", underline=True, size=20)\n        self.create_widgets()\n\n    def create_widgets(self) -> None:\n        \"\"\"Create widgets.\"\"\"\n        # bug in sockets library wont allow you to raise keyboardinterrupt, so stopping\n        # Content frame will have 4 rows. first will be header, 2nd is energy graph, 3rd will contain a frame that has censusreport.org links, 4th will have progress bar frame\n        self.content_frame = ctk.CTkFrame(self, border_width=2)\n        self.content_banner_frame = ctk.CTkFrame(self.content_frame, border_width=2)\n        self.state_and_year_content_banner_dropdown_frame = ctk.CTkFrame(\n            self.content_banner_frame, border_width=2\n        )\n        self.census_reporter_frame = ctk.CTkFrame(self.content_frame, border_width=2)\n        self.log_frame = ctk.CTkFrame(self.content_frame, border_width=2)\n\n        self.content_banner_main_text = ctk.CTkLabel(\n            self.content_banner_frame,\n            text=\"Census and Energy Data:\",\n            font=self.roboto_header_font,\n        )\n        self.content_banner_main_text.bind(\n            \"<Configure>\",\n            command=lambda x: self.content_banner_main_text.configure(\n                wraplength=self.content_banner_main_text._current_width\n                - 40  # random padding\n            ),\n        )\n        # nested frame for holding filters and text inside banner frame\n\n        self.select_state_label = ctk.CTkLabel(\n            self.state_and_year_content_banner_dropdown_frame,\n            text=\"Select State\",\n            font=self.roboto_font,\n        )\n        self.select_state_dropdown = ctk.CTkOptionMenu(\n            self.state_and_year_content_banner_dropdown_frame,\n            values=None,\n            command=self.state_dropdown_callback,\n        )\n\n        self.select_year_label = ctk.CTkLabel(\n            self.state_and_year_content_banner_dropdown_frame,\n            text=\"Select Year\",\n            font=self.roboto_font,\n        )\n        self.select_year_dropdown = ctk.CTkOptionMenu(\n            self.state_and_year_content_banner_dropdown_frame,\n            values=self.years,\n            command=self.year_dropdown_callback,\n        )\n\n        self.energy_graph_frame = ctk.CTkFrame(self.content_frame, border_width=2)\n\n        self.census_reporter_state_label = ctk.CTkLabel(\n            self.census_reporter_frame,\n            text=\"Census Reporter: State Report\",\n            font=self.roboto_link_font,\n            cursor=\"hand2\",\n            text_color=\"blue\",\n        )\n\n        self.log_button = ctk.CTkButton(\n            self.log_frame, text=\"Open Log File\", command=self.open_log_file\n        )\n        self.census_reporter_state_label.bind(\n            \"<Button-1>\", lambda x: self.open_census_reporter_state()\n        )\n        self.census_reporter_metro_label = ctk.CTkLabel(\n            self.census_reporter_frame,\n            text=\"Census Reporter: Metro Report\",\n            font=self.roboto_link_font,\n            cursor=\"hand2\",\n            text_color=\"blue\",\n        )\n        self.census_reporter_metro_label.bind(\n            \"<Button-1>\", lambda x: self.open_census_reporter_metro()\n        )\n        # create grid\n        # col\n        self.columnconfigure(0, weight=1)\n        self.content_frame.columnconfigure(0, weight=1)\n        self.content_banner_frame.columnconfigure((0, 1), weight=1)\n        self.state_and_year_content_banner_dropdown_frame.columnconfigure(\n            (0, 1), weight=1\n        )\n        self.energy_graph_frame.columnconfigure(0, weight=1)\n        self.census_reporter_frame.columnconfigure(0, weight=1)\n        self.log_frame.columnconfigure(0, weight=1)\n\n        # row\n        self.rowconfigure(0, weight=1)\n\n        self.content_frame.rowconfigure(0, weight=1)  # banner\n        self.content_frame.rowconfigure(1, weight=5)  # energy graph\n        self.content_frame.rowconfigure(2, weight=2)  # census reporter frame\n        self.content_frame.rowconfigure(3, weight=1)\n\n        self.content_banner_frame.rowconfigure(0, weight=1)\n\n        self.state_and_year_content_banner_dropdown_frame.rowconfigure((0, 1), weight=1)\n\n        self.energy_graph_frame.rowconfigure(0, weight=1)\n\n        self.census_reporter_frame.rowconfigure((0, 1), weight=1)\n\n        self.log_frame.rowconfigure(0, weight=1)\n\n        # placement\n        self.content_frame.grid(column=0, row=0, sticky=\"news\")\n\n        self.content_banner_frame.grid(column=0, row=0, sticky=\"news\")\n\n        self.content_banner_main_text.grid(column=0, row=0, sticky=\"nsew\")\n\n        self.state_and_year_content_banner_dropdown_frame.grid(\n            column=1, row=0, sticky=\"news\"\n        )\n\n        self.select_state_label.grid(column=0, row=0, sticky=\"news\")\n        self.select_year_label.grid(column=1, row=0, sticky=\"news\")\n        self.select_state_dropdown.grid(column=0, row=1)\n        self.select_year_dropdown.grid(column=1, row=1)\n\n        self.energy_graph_frame.grid(column=0, row=1, sticky=\"news\")\n\n        self.census_reporter_frame.grid(column=0, row=2, sticky=\"news\")\n        self.census_reporter_state_label.grid(column=0, row=0)\n        self.census_reporter_metro_label.grid(column=0, row=1)\n\n        self.log_frame.grid(column=0, row=3, sticky=\"news\")\n        self.log_button.grid(column=0, row=0, pady=10)\n\n    def set_msa_name(self, msa_name: str) -> None:\n        \"\"\"Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.\n\n        Args:\n            msa_name (str): Metropolitan Statistical Area name. This must be validated\n        \"\"\"\n        self.msa_name = msa_name\n        self.states_in_msa = helper.get_states_in_msa(self.msa_name)\n\n        if len(self.states_in_msa) > 0:\n            self.select_state_dropdown.configure()\n            self.select_state_dropdown.set(self.states_in_msa[0])\n\n        self.select_state_dropdown.configure(values=self.states_in_msa)\n        self.content_banner_main_text.configure(\n            text=f\"Census and Energy Data: {self.msa_name}\"\n        )\n        self.zip_list = helper.metro_name_to_zip_code_list(msa_name)\n        self.zip_list = [str(zip) for zip in self.zip_list]\n\n        threading.Thread(\n            target=self.generate_energy_plot,\n            args=(\n                int(self.select_year_dropdown.get()),\n                self.select_state_dropdown.get(),\n            ),\n            daemon=True,\n        ).start()\n\n    def generate_energy_plot(self, year: int, state: str) -> None:\n        \"\"\"Call the EIA API and generate a plot with the received data.\n\n        Note:\n            Call this in a thread so that it doesn't freeze the GUI\n            Update: might want to just get the data and plot on the main thread\n        \"\"\"\n        eia = EIADataRetriever()\n        energy_price_per_mbtu_by_type_for_state = (\n            eia.monthly_price_per_mbtu_by_energy_type_by_state(\n                state, datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)\n            )\n        )\n\n        fig = Figure(layout=\"compressed\", facecolor=\"blue\")\n        ax = fig.add_subplot()\n        ax.set_xlabel(\"Time (Months)\")\n        ax.set_ylabel(\"Cost per Effective MBTU ($/MBTU)\")\n        ax.set_title(\n            f\"Avg. Energy Prices by Appliance for {state}, {year}\",\n            loc=\"center\",\n            wrap=True,\n        )\n        months = [i for i in range(1, 13)]\n        month_names = [\n            \"Jan\",\n            \"Feb\",\n            \"Mar\",\n            \"Apr\",\n            \"May\",\n            \"Jun\",\n            \"Jul\",\n            \"Aug\",\n            \"Sep\",\n            \"Oct\",\n            \"Nov\",\n            \"Dec\",\n        ]\n        ax.set_xticks(months)\n        labels = [item.get_text() for item in ax.get_xticklabels()]\n\n        # Modify specific labels, keeping offset\n        for i in range(0, 12):\n            labels[i] = month_names[i]\n        ax.set_xticklabels(labels)\n\n        for energy_dict in energy_price_per_mbtu_by_type_for_state:\n            if len(energy_dict) < 3:\n                log(\n                    f\"Issue with energy type {energy_dict.get(\"type\")} for state {energy_dict.get(\"state\")}\",\n                    \"debug\",\n                )\n                continue\n            match energy_dict.get(\"type\"):\n                case EIADataRetriever.EnergyType.PROPANE.value:\n                    result_list = []\n                    for month in months:\n                        key = f\"{year}-{month:02}\"\n                        val = energy_dict.get(key, float(\"NaN\"))\n                        if val is None:\n                            val = float(\"NaN\")\n                        result_list.append(val)\n                    ax.plot(months, result_list, label=\"Propane Furnace\")\n                case EIADataRetriever.EnergyType.HEATING_OIL.value:\n                    result_list = []\n                    for month in months:\n                        key = f\"{year}-{month:02}\"\n                        val = energy_dict.get(key, float(\"NaN\"))\n                        if val is None:\n                            val = float(\"NaN\")\n                        result_list.append(val)\n                    ax.plot(months, result_list, label=\"Heating Oil Boiler\")\n                case EIADataRetriever.EnergyType.NATURAL_GAS.value:\n                    result_list = []\n                    for month in months:\n                        key = f\"{year}-{month:02}\"\n                        val = energy_dict.get(key, float(\"NaN\"))\n                        if val is None:\n                            val = float(\"NaN\")\n                        result_list.append(val)\n                    ax.plot(months, result_list, label=\"Natural Gas Furnace\")\n                case EIADataRetriever.EnergyType.ELECTRICITY.value:\n                    result_list = []\n                    for month in months:\n                        key = f\"{year}-{month:02}\"\n                        val = energy_dict.get(key, float(\"NaN\"))\n                        if val is None:\n                            val = float(\"NaN\")\n                        result_list.append(val)\n                    ax.plot(months, result_list, label=\"Ducted Heat Pump\")\n        ax.legend()\n        with threading.Lock():\n            canvas = FigureCanvasTkAgg(fig, master=self.energy_graph_frame)\n            canvas.draw()\n\n            # toolbar = NavigationToolbar2Tk(canvas, window=self.energy_graph_frame, pack_toolbar=False)\n            # toolbar.update()\n            # canvas.mpl_connect(\"key_press_event\", key_press_handler)\n\n            # toolbar.grid(column=0, row=1, sticky=\"news\")\n            canvas.get_tk_widget().grid(column=0, row=0)\n\n    def open_census_reporter_state(self) -> None:\n        \"\"\"Census reporter state label callback\"\"\"\n        state_link = helper.get_census_report_url_page(\n            sts.lookup(self.select_state_dropdown.get()).name  # type: ignore\n        )\n        webbrowser.open_new_tab(state_link)\n\n    def open_census_reporter_metro(self) -> None:\n        \"\"\"Census reporter metro label callback\"\"\"\n        metro_link = helper.get_census_report_url_page(f\"{self.msa_name} metro area\")  # type: ignore\n        webbrowser.open_new_tab(metro_link)\n\n    def state_dropdown_callback(self, state: str) -> None:\n        \"\"\"Banner state callback.\n        TODO:\n            check if thread is running with given name, and if so join it and start the new thread\n\n        Args:\n            state (str): the state after the change\n        \"\"\"\n\n        threading.Thread(\n            target=self.generate_energy_plot,\n            args=(\n                int(self.select_year_dropdown.get()),\n                state,\n            ),\n            name=\"energy_thread\",\n            daemon=True,\n        ).start()\n\n    def year_dropdown_callback(self, year: str) -> None:\n        \"\"\"Banner year callback.\n        TODO:\n            Check if thread is running with given name, and if so join it and start the new thread\n\n        Args:\n            year (str): the year after the change\n        \"\"\"\n        threading.Thread(\n            target=self.generate_energy_plot,\n            args=(\n                int(year),\n                self.select_state_dropdown.get(),\n            ),\n            name=\"energy_thread\",\n            daemon=True,\n        ).start()\n\n    def open_log_file(self) -> None:\n        \"\"\"Open logging file.\n\n        Note:\n            Haven't tested this on mac/linux. \"darwin\" doesn't exist in `system.platform` on windows, so cant say for sure if this works\n        \"\"\"\n        try:\n            if sys.platform == \"win32\":\n                startfile(helper.LOGGING_FILE_PATH)\n            else:\n                opener = \"open\" if sys.platform == \"darwin\" else \"xdg-open\"\n                subprocess.call([opener, helper.LOGGING_FILE_PATH])\n        except FileNotFoundError:\n            CTkMessagebox(\n                self,\n                title=\"Error\",\n                message=\"Logging file doesn't exist! Try rerunning the program or creating a logger.log file in /output/logging/\",\n                icon=\"warning\",\n            )\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.create_widgets","title":"create_widgets()","text":"

    Create widgets.

    Source code in src\\gui\\datapage.py
    def create_widgets(self) -> None:\n    \"\"\"Create widgets.\"\"\"\n    # bug in sockets library wont allow you to raise keyboardinterrupt, so stopping\n    # Content frame will have 4 rows. first will be header, 2nd is energy graph, 3rd will contain a frame that has censusreport.org links, 4th will have progress bar frame\n    self.content_frame = ctk.CTkFrame(self, border_width=2)\n    self.content_banner_frame = ctk.CTkFrame(self.content_frame, border_width=2)\n    self.state_and_year_content_banner_dropdown_frame = ctk.CTkFrame(\n        self.content_banner_frame, border_width=2\n    )\n    self.census_reporter_frame = ctk.CTkFrame(self.content_frame, border_width=2)\n    self.log_frame = ctk.CTkFrame(self.content_frame, border_width=2)\n\n    self.content_banner_main_text = ctk.CTkLabel(\n        self.content_banner_frame,\n        text=\"Census and Energy Data:\",\n        font=self.roboto_header_font,\n    )\n    self.content_banner_main_text.bind(\n        \"<Configure>\",\n        command=lambda x: self.content_banner_main_text.configure(\n            wraplength=self.content_banner_main_text._current_width\n            - 40  # random padding\n        ),\n    )\n    # nested frame for holding filters and text inside banner frame\n\n    self.select_state_label = ctk.CTkLabel(\n        self.state_and_year_content_banner_dropdown_frame,\n        text=\"Select State\",\n        font=self.roboto_font,\n    )\n    self.select_state_dropdown = ctk.CTkOptionMenu(\n        self.state_and_year_content_banner_dropdown_frame,\n        values=None,\n        command=self.state_dropdown_callback,\n    )\n\n    self.select_year_label = ctk.CTkLabel(\n        self.state_and_year_content_banner_dropdown_frame,\n        text=\"Select Year\",\n        font=self.roboto_font,\n    )\n    self.select_year_dropdown = ctk.CTkOptionMenu(\n        self.state_and_year_content_banner_dropdown_frame,\n        values=self.years,\n        command=self.year_dropdown_callback,\n    )\n\n    self.energy_graph_frame = ctk.CTkFrame(self.content_frame, border_width=2)\n\n    self.census_reporter_state_label = ctk.CTkLabel(\n        self.census_reporter_frame,\n        text=\"Census Reporter: State Report\",\n        font=self.roboto_link_font,\n        cursor=\"hand2\",\n        text_color=\"blue\",\n    )\n\n    self.log_button = ctk.CTkButton(\n        self.log_frame, text=\"Open Log File\", command=self.open_log_file\n    )\n    self.census_reporter_state_label.bind(\n        \"<Button-1>\", lambda x: self.open_census_reporter_state()\n    )\n    self.census_reporter_metro_label = ctk.CTkLabel(\n        self.census_reporter_frame,\n        text=\"Census Reporter: Metro Report\",\n        font=self.roboto_link_font,\n        cursor=\"hand2\",\n        text_color=\"blue\",\n    )\n    self.census_reporter_metro_label.bind(\n        \"<Button-1>\", lambda x: self.open_census_reporter_metro()\n    )\n    # create grid\n    # col\n    self.columnconfigure(0, weight=1)\n    self.content_frame.columnconfigure(0, weight=1)\n    self.content_banner_frame.columnconfigure((0, 1), weight=1)\n    self.state_and_year_content_banner_dropdown_frame.columnconfigure(\n        (0, 1), weight=1\n    )\n    self.energy_graph_frame.columnconfigure(0, weight=1)\n    self.census_reporter_frame.columnconfigure(0, weight=1)\n    self.log_frame.columnconfigure(0, weight=1)\n\n    # row\n    self.rowconfigure(0, weight=1)\n\n    self.content_frame.rowconfigure(0, weight=1)  # banner\n    self.content_frame.rowconfigure(1, weight=5)  # energy graph\n    self.content_frame.rowconfigure(2, weight=2)  # census reporter frame\n    self.content_frame.rowconfigure(3, weight=1)\n\n    self.content_banner_frame.rowconfigure(0, weight=1)\n\n    self.state_and_year_content_banner_dropdown_frame.rowconfigure((0, 1), weight=1)\n\n    self.energy_graph_frame.rowconfigure(0, weight=1)\n\n    self.census_reporter_frame.rowconfigure((0, 1), weight=1)\n\n    self.log_frame.rowconfigure(0, weight=1)\n\n    # placement\n    self.content_frame.grid(column=0, row=0, sticky=\"news\")\n\n    self.content_banner_frame.grid(column=0, row=0, sticky=\"news\")\n\n    self.content_banner_main_text.grid(column=0, row=0, sticky=\"nsew\")\n\n    self.state_and_year_content_banner_dropdown_frame.grid(\n        column=1, row=0, sticky=\"news\"\n    )\n\n    self.select_state_label.grid(column=0, row=0, sticky=\"news\")\n    self.select_year_label.grid(column=1, row=0, sticky=\"news\")\n    self.select_state_dropdown.grid(column=0, row=1)\n    self.select_year_dropdown.grid(column=1, row=1)\n\n    self.energy_graph_frame.grid(column=0, row=1, sticky=\"news\")\n\n    self.census_reporter_frame.grid(column=0, row=2, sticky=\"news\")\n    self.census_reporter_state_label.grid(column=0, row=0)\n    self.census_reporter_metro_label.grid(column=0, row=1)\n\n    self.log_frame.grid(column=0, row=3, sticky=\"news\")\n    self.log_button.grid(column=0, row=0, pady=10)\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.generate_energy_plot","title":"generate_energy_plot(year, state)","text":"

    Call the EIA API and generate a plot with the received data.

    Note

    Call this in a thread so that it doesn't freeze the GUI Update: might want to just get the data and plot on the main thread

    Source code in src\\gui\\datapage.py
    def generate_energy_plot(self, year: int, state: str) -> None:\n    \"\"\"Call the EIA API and generate a plot with the received data.\n\n    Note:\n        Call this in a thread so that it doesn't freeze the GUI\n        Update: might want to just get the data and plot on the main thread\n    \"\"\"\n    eia = EIADataRetriever()\n    energy_price_per_mbtu_by_type_for_state = (\n        eia.monthly_price_per_mbtu_by_energy_type_by_state(\n            state, datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)\n        )\n    )\n\n    fig = Figure(layout=\"compressed\", facecolor=\"blue\")\n    ax = fig.add_subplot()\n    ax.set_xlabel(\"Time (Months)\")\n    ax.set_ylabel(\"Cost per Effective MBTU ($/MBTU)\")\n    ax.set_title(\n        f\"Avg. Energy Prices by Appliance for {state}, {year}\",\n        loc=\"center\",\n        wrap=True,\n    )\n    months = [i for i in range(1, 13)]\n    month_names = [\n        \"Jan\",\n        \"Feb\",\n        \"Mar\",\n        \"Apr\",\n        \"May\",\n        \"Jun\",\n        \"Jul\",\n        \"Aug\",\n        \"Sep\",\n        \"Oct\",\n        \"Nov\",\n        \"Dec\",\n    ]\n    ax.set_xticks(months)\n    labels = [item.get_text() for item in ax.get_xticklabels()]\n\n    # Modify specific labels, keeping offset\n    for i in range(0, 12):\n        labels[i] = month_names[i]\n    ax.set_xticklabels(labels)\n\n    for energy_dict in energy_price_per_mbtu_by_type_for_state:\n        if len(energy_dict) < 3:\n            log(\n                f\"Issue with energy type {energy_dict.get(\"type\")} for state {energy_dict.get(\"state\")}\",\n                \"debug\",\n            )\n            continue\n        match energy_dict.get(\"type\"):\n            case EIADataRetriever.EnergyType.PROPANE.value:\n                result_list = []\n                for month in months:\n                    key = f\"{year}-{month:02}\"\n                    val = energy_dict.get(key, float(\"NaN\"))\n                    if val is None:\n                        val = float(\"NaN\")\n                    result_list.append(val)\n                ax.plot(months, result_list, label=\"Propane Furnace\")\n            case EIADataRetriever.EnergyType.HEATING_OIL.value:\n                result_list = []\n                for month in months:\n                    key = f\"{year}-{month:02}\"\n                    val = energy_dict.get(key, float(\"NaN\"))\n                    if val is None:\n                        val = float(\"NaN\")\n                    result_list.append(val)\n                ax.plot(months, result_list, label=\"Heating Oil Boiler\")\n            case EIADataRetriever.EnergyType.NATURAL_GAS.value:\n                result_list = []\n                for month in months:\n                    key = f\"{year}-{month:02}\"\n                    val = energy_dict.get(key, float(\"NaN\"))\n                    if val is None:\n                        val = float(\"NaN\")\n                    result_list.append(val)\n                ax.plot(months, result_list, label=\"Natural Gas Furnace\")\n            case EIADataRetriever.EnergyType.ELECTRICITY.value:\n                result_list = []\n                for month in months:\n                    key = f\"{year}-{month:02}\"\n                    val = energy_dict.get(key, float(\"NaN\"))\n                    if val is None:\n                        val = float(\"NaN\")\n                    result_list.append(val)\n                ax.plot(months, result_list, label=\"Ducted Heat Pump\")\n    ax.legend()\n    with threading.Lock():\n        canvas = FigureCanvasTkAgg(fig, master=self.energy_graph_frame)\n        canvas.draw()\n\n        # toolbar = NavigationToolbar2Tk(canvas, window=self.energy_graph_frame, pack_toolbar=False)\n        # toolbar.update()\n        # canvas.mpl_connect(\"key_press_event\", key_press_handler)\n\n        # toolbar.grid(column=0, row=1, sticky=\"news\")\n        canvas.get_tk_widget().grid(column=0, row=0)\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.open_census_reporter_metro","title":"open_census_reporter_metro()","text":"

    Census reporter metro label callback

    Source code in src\\gui\\datapage.py
    def open_census_reporter_metro(self) -> None:\n    \"\"\"Census reporter metro label callback\"\"\"\n    metro_link = helper.get_census_report_url_page(f\"{self.msa_name} metro area\")  # type: ignore\n    webbrowser.open_new_tab(metro_link)\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.open_census_reporter_state","title":"open_census_reporter_state()","text":"

    Census reporter state label callback

    Source code in src\\gui\\datapage.py
    def open_census_reporter_state(self) -> None:\n    \"\"\"Census reporter state label callback\"\"\"\n    state_link = helper.get_census_report_url_page(\n        sts.lookup(self.select_state_dropdown.get()).name  # type: ignore\n    )\n    webbrowser.open_new_tab(state_link)\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.open_log_file","title":"open_log_file()","text":"

    Open logging file.

    Note

    Haven't tested this on mac/linux. \"darwin\" doesn't exist in system.platform on windows, so cant say for sure if this works

    Source code in src\\gui\\datapage.py
    def open_log_file(self) -> None:\n    \"\"\"Open logging file.\n\n    Note:\n        Haven't tested this on mac/linux. \"darwin\" doesn't exist in `system.platform` on windows, so cant say for sure if this works\n    \"\"\"\n    try:\n        if sys.platform == \"win32\":\n            startfile(helper.LOGGING_FILE_PATH)\n        else:\n            opener = \"open\" if sys.platform == \"darwin\" else \"xdg-open\"\n            subprocess.call([opener, helper.LOGGING_FILE_PATH])\n    except FileNotFoundError:\n        CTkMessagebox(\n            self,\n            title=\"Error\",\n            message=\"Logging file doesn't exist! Try rerunning the program or creating a logger.log file in /output/logging/\",\n            icon=\"warning\",\n        )\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.set_msa_name","title":"set_msa_name(msa_name)","text":"

    Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.

    Parameters:

    Name Type Description Default msa_name str

    Metropolitan Statistical Area name. This must be validated

    required Source code in src\\gui\\datapage.py
    def set_msa_name(self, msa_name: str) -> None:\n    \"\"\"Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.\n\n    Args:\n        msa_name (str): Metropolitan Statistical Area name. This must be validated\n    \"\"\"\n    self.msa_name = msa_name\n    self.states_in_msa = helper.get_states_in_msa(self.msa_name)\n\n    if len(self.states_in_msa) > 0:\n        self.select_state_dropdown.configure()\n        self.select_state_dropdown.set(self.states_in_msa[0])\n\n    self.select_state_dropdown.configure(values=self.states_in_msa)\n    self.content_banner_main_text.configure(\n        text=f\"Census and Energy Data: {self.msa_name}\"\n    )\n    self.zip_list = helper.metro_name_to_zip_code_list(msa_name)\n    self.zip_list = [str(zip) for zip in self.zip_list]\n\n    threading.Thread(\n        target=self.generate_energy_plot,\n        args=(\n            int(self.select_year_dropdown.get()),\n            self.select_state_dropdown.get(),\n        ),\n        daemon=True,\n    ).start()\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.state_dropdown_callback","title":"state_dropdown_callback(state)","text":"

    Banner state callback. TODO: check if thread is running with given name, and if so join it and start the new thread

    Parameters:

    Name Type Description Default state str

    the state after the change

    required Source code in src\\gui\\datapage.py
    def state_dropdown_callback(self, state: str) -> None:\n    \"\"\"Banner state callback.\n    TODO:\n        check if thread is running with given name, and if so join it and start the new thread\n\n    Args:\n        state (str): the state after the change\n    \"\"\"\n\n    threading.Thread(\n        target=self.generate_energy_plot,\n        args=(\n            int(self.select_year_dropdown.get()),\n            state,\n        ),\n        name=\"energy_thread\",\n        daemon=True,\n    ).start()\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.year_dropdown_callback","title":"year_dropdown_callback(year)","text":"

    Banner year callback. TODO: Check if thread is running with given name, and if so join it and start the new thread

    Parameters:

    Name Type Description Default year str

    the year after the change

    required Source code in src\\gui\\datapage.py
    def year_dropdown_callback(self, year: str) -> None:\n    \"\"\"Banner year callback.\n    TODO:\n        Check if thread is running with given name, and if so join it and start the new thread\n\n    Args:\n        year (str): the year after the change\n    \"\"\"\n    threading.Thread(\n        target=self.generate_energy_plot,\n        args=(\n            int(year),\n            self.select_state_dropdown.get(),\n        ),\n        name=\"energy_thread\",\n        daemon=True,\n    ).start()\n
    "},{"location":"gui/filterspage/","title":"Filterspage","text":""},{"location":"gui/filterspage/#gui.filterspage.FiltersPage","title":"FiltersPage","text":"

    Bases: CTkFrame

    Source code in src\\gui\\filterspage.py
    class FiltersPage(ctk.CTkFrame):\n    def __init__(self, master: ctk.CTk, search_page: ctk.CTkFrame, **kwargs):\n        # main setup\n        super().__init__(master, **kwargs)\n        self.root = master\n        self.search_page = search_page\n        self.cur_year = datetime.datetime.now().year\n        self.year_list = [str(x) for x in range(2010, self.cur_year + 1)]\n        list.reverse(self.year_list)\n        self.sqft_list = [sqft.value for sqft in RedfinApi.Sqft]\n        list.reverse(self.sqft_list)\n        self.sold_within_list = [\n            \"Last 1 week\",\n            \"Last 1 month\",\n            \"Last 3 months\",\n            \"Last 6 months\",\n            \"Last 1 year\",\n            \"Last 2 years\",\n            \"Last 3 years\",\n            \"Last 5 years\",\n        ]\n        self.price_list = [price.value for price in RedfinApi.Price]\n        list.reverse(self.price_list)\n        self.create_widgets()\n        self.set_default_values()\n\n    def create_widgets(self) -> None:\n        \"\"\"Create widgets.\"\"\"\n        # frames\n        self.content_frame = ctk.CTkFrame(self)\n        self.for_sale_sold_frame = ctk.CTkFrame(\n            self.content_frame, width=300, height=100, fg_color=\"transparent\"\n        )\n        self.stories_frame = ctk.CTkFrame(self.content_frame)\n        self.year_built_frame = ctk.CTkFrame(self.content_frame, fg_color=\"transparent\")\n        self.home_type_frame = ctk.CTkFrame(self.content_frame)\n        self.square_feet_frame = ctk.CTkFrame(self.content_frame)\n        self.status_frame = ctk.CTkFrame(self.content_frame)\n        self.sold_within_frame = ctk.CTkFrame(self.content_frame)\n        self.price_range_frame = ctk.CTkFrame(self.content_frame)\n        self.reset_apply_frame = ctk.CTkFrame(self.content_frame)\n\n        # make more grid\n        self.columnconfigure((0, 2), weight=1)\n        self.columnconfigure(1, weight=30)\n        self.content_frame.columnconfigure((0), weight=1, uniform=\"a\")  # uniform\n        self.for_sale_sold_frame.columnconfigure((0, 1), weight=1)\n        self.stories_frame.columnconfigure((0, 1), weight=1)\n        self.year_built_frame.columnconfigure((0, 1, 2, 3), weight=1)\n        self.home_type_frame.columnconfigure((0, 1, 2, 3), weight=1)\n        self.square_feet_frame.columnconfigure((0, 1, 2, 3), weight=1)\n        self.status_frame.columnconfigure((0, 1, 2), weight=1)\n        self.sold_within_frame.columnconfigure((0, 1), weight=1)\n        self.price_range_frame.columnconfigure((0, 1, 2, 3), weight=1)\n        self.reset_apply_frame.columnconfigure((0, 1), weight=1)\n\n        self.rowconfigure((0, 2), weight=1)\n        self.rowconfigure(1, weight=30)\n        self.content_frame.rowconfigure(\n            (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), weight=1, uniform=\"a\"\n        )\n        self.for_sale_sold_frame.rowconfigure(0, weight=1)\n        self.stories_frame.rowconfigure(0, weight=1)\n        self.year_built_frame.rowconfigure((0, 1), weight=1)\n        self.home_type_frame.rowconfigure((0, 1, 2), weight=1)\n        self.square_feet_frame.rowconfigure((0, 1), weight=1)\n        self.status_frame.rowconfigure((0, 1), weight=1)\n        self.sold_within_frame.rowconfigure(0, weight=1)\n        self.price_range_frame.rowconfigure((0, 1), weight=1)\n        self.reset_apply_frame.rowconfigure(0, weight=1)\n\n        # placing the frames\n        self.content_frame.grid(row=1, column=1)\n        self.for_sale_sold_frame.grid(row=0, column=0, sticky=\"nsew\")\n        self.stories_frame.grid(row=1, column=0, sticky=\"nesw\")\n        self.year_built_frame.grid(row=2, column=0, sticky=\"nesw\")\n        self.home_type_frame.grid(row=3, column=0, rowspan=2, sticky=\"nesw\")\n        self.square_feet_frame.grid(row=5, column=0, sticky=\"nesw\")\n        self.status_frame.grid(row=6, column=0)\n        self.sold_within_frame.grid(row=7, column=0, sticky=\"nesw\")\n        self.price_range_frame.grid(row=8, column=0, rowspan=2, sticky=\"nesw\")\n        self.reset_apply_frame.grid(row=10, column=0)\n\n        # Create the labels\n        self.for_sale_sold_label = ctk.CTkLabel(\n            self.for_sale_sold_frame, text=\"For Sale/Sold\"\n        )\n        self.stories_label = ctk.CTkLabel(self.stories_frame, text=\"Stories\")\n        self.year_built_label = ctk.CTkLabel(self.year_built_frame, text=\"Year Built\")\n        self.home_type_label = ctk.CTkLabel(self.home_type_frame, text=\"Home Type\")\n        self.sqft_label = ctk.CTkLabel(self.square_feet_frame, text=\"Square Feet\")\n        self.sale_status_label = ctk.CTkLabel(self.status_frame, text=\"Status\")\n        self.price_range_label = ctk.CTkLabel(\n            self.price_range_frame, text=\"Price Range\"\n        )\n        self.price_range_from_label = ctk.CTkLabel(self.price_range_frame, text=\"From\")\n        self.price_range_to_label = ctk.CTkLabel(self.price_range_frame, text=\"To\")\n        self.year_built_from_label = ctk.CTkLabel(self.year_built_frame, text=\"From\")\n        self.year_built_to_label = ctk.CTkLabel(self.year_built_frame, text=\"To\")\n        self.sold_within_label = ctk.CTkLabel(\n            self.sold_within_frame, text=\"Sold Within\"\n        )\n        self.sold_within_from_label = ctk.CTkLabel(self.square_feet_frame, text=\"From\")\n        self.sold_within_to_label = ctk.CTkLabel(self.square_feet_frame, text=\"To\")\n\n        # Create the Buttons\n        self.for_sale_sold_om = ctk.CTkOptionMenu(\n            master=self.for_sale_sold_frame,\n            values=[status.value for status in RedfinApi.SoldStatus],\n            command=lambda x: self.status_within_activate_deactivate(x),\n        )\n\n        self.min_stories_om = ctk.CTkOptionMenu(\n            self.stories_frame, values=[story.value for story in RedfinApi.Stories]\n        )\n\n        self.min_year_built_om = ctk.CTkOptionMenu(\n            self.year_built_frame,\n            values=self.year_list,\n            command=lambda x: self.year_validation(),\n        )\n\n        self.max_year_built_om = ctk.CTkOptionMenu(\n            self.year_built_frame,\n            values=self.year_list,\n            command=lambda x: self.year_validation(),\n        )\n\n        self.house_type_house_switch = ctk.CTkSwitch(\n            self.home_type_frame,\n            text=\"House\",\n            command=self.house_type_validation,\n        )\n        self.house_type_townhouse_switch = ctk.CTkSwitch(\n            self.home_type_frame,\n            text=\"Townhouse\",\n            command=self.house_type_validation,\n        )\n        self.house_type_condo_switch = ctk.CTkSwitch(\n            self.home_type_frame,\n            text=\"Condo\",\n            command=self.house_type_validation,\n        )\n        self.house_type_mul_fam_switch = ctk.CTkSwitch(\n            self.home_type_frame,\n            text=\"Multi-Family\",\n            command=self.house_type_validation,\n        )\n\n        self.min_sqft_om = ctk.CTkOptionMenu(\n            self.square_feet_frame,\n            values=self.sqft_list,\n            command=lambda x: self.sqft_validation(),\n        )\n        self.max_sqft_om = ctk.CTkOptionMenu(\n            self.square_feet_frame,\n            values=self.sqft_list,\n            command=lambda x: self.sqft_validation(),\n        )\n        self.status_coming_soon_chb = ctk.CTkCheckBox(\n            self.status_frame, text=\"Coming soon\"\n        )\n        self.status_active_chb = ctk.CTkCheckBox(self.status_frame, text=\"Active\")\n        self.status_pending_chb = ctk.CTkCheckBox(\n            self.status_frame, text=\"Under contract/Pending\"\n        )  # missing one i think\n        self.sold_within_om = ctk.CTkOptionMenu(\n            self.sold_within_frame, values=self.sold_within_list\n        )\n\n        self.min_price_om = ctk.CTkOptionMenu(\n            self.price_range_frame,\n            values=self.price_list,\n            command=lambda x: self.price_validation(),\n        )\n        self.max_price_om = ctk.CTkOptionMenu(\n            self.price_range_frame,\n            values=self.price_list,\n            command=lambda x: self.price_validation(),\n        )\n\n        self.reset_filters_button = ctk.CTkButton(\n            self.reset_apply_frame,\n            text=\"Reset Filters\",\n            command=self.set_default_values,\n        )\n        self.apply_filters_button = ctk.CTkButton(\n            self.reset_apply_frame,\n            text=\"Apply Filters\",\n            command=self.change_to_search_page,\n        )\n\n        # Placing the widgets\n        self.for_sale_sold_label.grid(row=0, column=0)\n        self.stories_label.grid(row=0, column=0)\n        self.year_built_label.grid(row=0, column=0)\n        self.home_type_label.grid(row=0, column=0)\n        self.sqft_label.grid(row=0, column=0)\n        self.sale_status_label.grid(row=0, column=0)\n        self.price_range_label.grid(row=0, column=0)\n        self.year_built_from_label.grid(row=1, column=0)\n        self.year_built_to_label.grid(row=1, column=2)\n        self.price_range_from_label.grid(row=1, column=0)\n        self.price_range_to_label.grid(row=1, column=2)\n        self.sold_within_label.grid(row=0, column=0)\n        self.sold_within_from_label.grid(row=1, column=0)\n        self.sold_within_to_label.grid(row=1, column=2)\n\n        self.for_sale_sold_om.grid(row=0, column=1)\n        self.min_stories_om.grid(row=0, column=1)\n        self.min_year_built_om.grid(row=1, column=1)\n        self.max_year_built_om.grid(row=1, column=3)\n        self.min_sqft_om.grid(row=1, column=1)\n        self.max_sqft_om.grid(row=1, column=3)\n        self.sold_within_om.grid(row=0, column=1)\n        self.min_price_om.grid(row=1, column=1)\n        self.max_price_om.grid(row=1, column=3)\n        self.house_type_house_switch.grid(row=1, column=0)\n        self.house_type_townhouse_switch.grid(row=1, column=1)\n        self.house_type_condo_switch.grid(row=2, column=0)\n        self.house_type_mul_fam_switch.grid(row=2, column=1)\n        self.status_coming_soon_chb.grid(row=1, column=0)\n        self.status_active_chb.grid(row=1, column=1)\n        self.status_pending_chb.grid(row=1, column=2)\n        self.reset_filters_button.grid(row=0, column=0, sticky=\"nesw\")\n        self.apply_filters_button.grid(row=0, column=1, sticky=\"nesw\")\n\n    def set_default_values(self) -> None:\n        \"\"\"Set the default values for all widgets.\n        Note:\n            Should be called after init and when clicking reset button.\n        \"\"\"\n        self.for_sale_sold_om.set(RedfinApi.SoldStatus.SOLD.value)\n        self.min_stories_om.set(RedfinApi.Stories.ONE.value)\n        self.min_year_built_om.set(str(self.cur_year - 1))\n        self.max_year_built_om.set(str(self.cur_year - 1))\n        self.sold_within_om.set(self.sold_within_list[-1])\n        self.max_price_om.set(RedfinApi.Price.NONE.value)\n        self.min_price_om.set(RedfinApi.Price.NONE.value)\n        self.max_sqft_om.set(RedfinApi.Sqft.NONE.value)\n        self.min_sqft_om.set(RedfinApi.Sqft.NONE.value)\n        self.status_active_chb.deselect()\n        self.status_pending_chb.deselect()\n        self.status_coming_soon_chb.deselect()\n        self.house_type_house_switch.select()\n        self.house_type_condo_switch.deselect()\n        self.house_type_townhouse_switch.deselect()\n        self.house_type_mul_fam_switch.deselect()\n        self.status_within_activate_deactivate(self.for_sale_sold_om.get())\n\n    def status_within_activate_deactivate(self, status) -> None:\n        \"\"\"Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.\n\n        Args:\n            status (Event): ignored\n        \"\"\"\n        match self.for_sale_sold_om.get():\n            case RedfinApi.SoldStatus.FOR_SALE.value:\n                self.sale_status_label.configure(state=\"normal\")\n                self.status_active_chb.configure(state=\"normal\")\n                self.status_coming_soon_chb.configure(state=\"normal\")\n                self.status_pending_chb.configure(state=\"normal\")\n                self.sold_within_label.configure(state=\"disabled\")\n                self.sold_within_om.configure(state=\"disabled\")\n            case RedfinApi.SoldStatus.SOLD.value:\n                self.sale_status_label.configure(state=\"disabled\")\n                self.status_active_chb.configure(state=\"disabled\")\n                self.status_coming_soon_chb.configure(state=\"disabled\")\n                self.status_pending_chb.configure(state=\"disabled\")\n                self.sold_within_label.configure(state=\"normal\")\n                self.sold_within_om.configure(state=\"normal\")\n\n    def change_to_search_page(self) -> None:\n        \"\"\"Change to search page.\"\"\"\n        self.grid_remove()\n        self.search_page.grid()\n\n    def price_validation(self):\n        \"\"\"Called when price range min om gets changed\"\"\"\n        if (\n            self.max_price_om.get() == RedfinApi.Price.NONE.value\n            or self.min_price_om.get() == RedfinApi.Price.NONE.value\n        ):\n            return\n        if int(self.max_price_om.get()) < int(self.min_price_om.get()):\n            self.max_price_om.set(self.min_price_om.get())\n\n    def year_validation(self) -> None:\n        \"\"\"Year drop down callback\"\"\"\n        if int(self.max_year_built_om.get()) < int(self.min_year_built_om.get()):\n            self.max_year_built_om.set(self.min_year_built_om.get())\n\n    def sqft_validation(self) -> None:\n        \"\"\"Sqft dropdown callback\"\"\"\n        if (\n            self.max_sqft_om.get() == RedfinApi.Sqft.NONE.value\n            or self.min_sqft_om.get() == RedfinApi.Sqft.NONE.value\n        ):\n            return\n        if int(self.max_sqft_om.get()) < int(self.min_sqft_om.get()):\n            self.max_sqft_om.set(self.min_sqft_om.get())\n\n    def house_type_validation(self) -> None:\n        \"\"\"House type switch validation to make sure at lest house is selected.\"\"\"\n        if not any(\n            [\n                self.house_type_house_switch.get(),\n                self.house_type_condo_switch.get(),\n                self.house_type_mul_fam_switch.get(),\n                self.house_type_townhouse_switch.get(),\n            ]\n        ):\n            self.house_type_house_switch.select()\n\n    def get_values(self) -> dict[str, Any]:\n        \"\"\"Get the values of all widgets on this page.\n\n        Returns:\n            dict[str, Any]: dict of values\n        \"\"\"\n        match self.sold_within_om.get():\n            case \"Last 1 week\":\n                sold_within_days = RedfinApi.SoldWithinDays.ONE_WEEK\n            case \"Last 1 month\":\n                sold_within_days = RedfinApi.SoldWithinDays.ONE_MONTH\n            case \"Last 3 months\":\n                sold_within_days = RedfinApi.SoldWithinDays.THREE_MONTHS\n            case \"Last 6 months\":\n                sold_within_days = RedfinApi.SoldWithinDays.SIX_MONTHS\n            case \"Last 1 year\":\n                sold_within_days = RedfinApi.SoldWithinDays.ONE_YEAR\n            case \"Last 2 years\":\n                sold_within_days = RedfinApi.SoldWithinDays.TWO_YEARS\n            case \"Last 3 years\":\n                sold_within_days = RedfinApi.SoldWithinDays.THREE_YEARS\n            case _:\n                sold_within_days = RedfinApi.SoldWithinDays.FIVE_YEARS\n\n        return {\n            \"for sale sold\": self.for_sale_sold_om.get(),\n            \"min stories\": self.min_stories_om.get(),\n            \"max year built\": self.max_year_built_om.get(),  # do validation here\n            \"min year built\": self.min_year_built_om.get(),\n            \"sold within\": sold_within_days.value,\n            \"status active\": bool(self.status_active_chb.get()),\n            \"status coming soon\": bool(self.status_coming_soon_chb.get()),\n            \"status pending\": bool(self.status_pending_chb.get()),\n            \"house type house\": bool(self.house_type_house_switch.get()),\n            \"house type townhouse\": bool(self.house_type_townhouse_switch.get()),\n            \"house type mul fam\": bool(self.house_type_mul_fam_switch.get()),\n            \"house type condo\": bool(self.house_type_condo_switch.get()),\n            \"max sqft\": self.max_sqft_om.get(),\n            \"min sqft\": self.min_sqft_om.get(),\n            \"max price\": self.max_price_om.get(),\n            \"min price\": self.min_price_om.get(),\n        }\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.change_to_search_page","title":"change_to_search_page()","text":"

    Change to search page.

    Source code in src\\gui\\filterspage.py
    def change_to_search_page(self) -> None:\n    \"\"\"Change to search page.\"\"\"\n    self.grid_remove()\n    self.search_page.grid()\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.create_widgets","title":"create_widgets()","text":"

    Create widgets.

    Source code in src\\gui\\filterspage.py
    def create_widgets(self) -> None:\n    \"\"\"Create widgets.\"\"\"\n    # frames\n    self.content_frame = ctk.CTkFrame(self)\n    self.for_sale_sold_frame = ctk.CTkFrame(\n        self.content_frame, width=300, height=100, fg_color=\"transparent\"\n    )\n    self.stories_frame = ctk.CTkFrame(self.content_frame)\n    self.year_built_frame = ctk.CTkFrame(self.content_frame, fg_color=\"transparent\")\n    self.home_type_frame = ctk.CTkFrame(self.content_frame)\n    self.square_feet_frame = ctk.CTkFrame(self.content_frame)\n    self.status_frame = ctk.CTkFrame(self.content_frame)\n    self.sold_within_frame = ctk.CTkFrame(self.content_frame)\n    self.price_range_frame = ctk.CTkFrame(self.content_frame)\n    self.reset_apply_frame = ctk.CTkFrame(self.content_frame)\n\n    # make more grid\n    self.columnconfigure((0, 2), weight=1)\n    self.columnconfigure(1, weight=30)\n    self.content_frame.columnconfigure((0), weight=1, uniform=\"a\")  # uniform\n    self.for_sale_sold_frame.columnconfigure((0, 1), weight=1)\n    self.stories_frame.columnconfigure((0, 1), weight=1)\n    self.year_built_frame.columnconfigure((0, 1, 2, 3), weight=1)\n    self.home_type_frame.columnconfigure((0, 1, 2, 3), weight=1)\n    self.square_feet_frame.columnconfigure((0, 1, 2, 3), weight=1)\n    self.status_frame.columnconfigure((0, 1, 2), weight=1)\n    self.sold_within_frame.columnconfigure((0, 1), weight=1)\n    self.price_range_frame.columnconfigure((0, 1, 2, 3), weight=1)\n    self.reset_apply_frame.columnconfigure((0, 1), weight=1)\n\n    self.rowconfigure((0, 2), weight=1)\n    self.rowconfigure(1, weight=30)\n    self.content_frame.rowconfigure(\n        (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), weight=1, uniform=\"a\"\n    )\n    self.for_sale_sold_frame.rowconfigure(0, weight=1)\n    self.stories_frame.rowconfigure(0, weight=1)\n    self.year_built_frame.rowconfigure((0, 1), weight=1)\n    self.home_type_frame.rowconfigure((0, 1, 2), weight=1)\n    self.square_feet_frame.rowconfigure((0, 1), weight=1)\n    self.status_frame.rowconfigure((0, 1), weight=1)\n    self.sold_within_frame.rowconfigure(0, weight=1)\n    self.price_range_frame.rowconfigure((0, 1), weight=1)\n    self.reset_apply_frame.rowconfigure(0, weight=1)\n\n    # placing the frames\n    self.content_frame.grid(row=1, column=1)\n    self.for_sale_sold_frame.grid(row=0, column=0, sticky=\"nsew\")\n    self.stories_frame.grid(row=1, column=0, sticky=\"nesw\")\n    self.year_built_frame.grid(row=2, column=0, sticky=\"nesw\")\n    self.home_type_frame.grid(row=3, column=0, rowspan=2, sticky=\"nesw\")\n    self.square_feet_frame.grid(row=5, column=0, sticky=\"nesw\")\n    self.status_frame.grid(row=6, column=0)\n    self.sold_within_frame.grid(row=7, column=0, sticky=\"nesw\")\n    self.price_range_frame.grid(row=8, column=0, rowspan=2, sticky=\"nesw\")\n    self.reset_apply_frame.grid(row=10, column=0)\n\n    # Create the labels\n    self.for_sale_sold_label = ctk.CTkLabel(\n        self.for_sale_sold_frame, text=\"For Sale/Sold\"\n    )\n    self.stories_label = ctk.CTkLabel(self.stories_frame, text=\"Stories\")\n    self.year_built_label = ctk.CTkLabel(self.year_built_frame, text=\"Year Built\")\n    self.home_type_label = ctk.CTkLabel(self.home_type_frame, text=\"Home Type\")\n    self.sqft_label = ctk.CTkLabel(self.square_feet_frame, text=\"Square Feet\")\n    self.sale_status_label = ctk.CTkLabel(self.status_frame, text=\"Status\")\n    self.price_range_label = ctk.CTkLabel(\n        self.price_range_frame, text=\"Price Range\"\n    )\n    self.price_range_from_label = ctk.CTkLabel(self.price_range_frame, text=\"From\")\n    self.price_range_to_label = ctk.CTkLabel(self.price_range_frame, text=\"To\")\n    self.year_built_from_label = ctk.CTkLabel(self.year_built_frame, text=\"From\")\n    self.year_built_to_label = ctk.CTkLabel(self.year_built_frame, text=\"To\")\n    self.sold_within_label = ctk.CTkLabel(\n        self.sold_within_frame, text=\"Sold Within\"\n    )\n    self.sold_within_from_label = ctk.CTkLabel(self.square_feet_frame, text=\"From\")\n    self.sold_within_to_label = ctk.CTkLabel(self.square_feet_frame, text=\"To\")\n\n    # Create the Buttons\n    self.for_sale_sold_om = ctk.CTkOptionMenu(\n        master=self.for_sale_sold_frame,\n        values=[status.value for status in RedfinApi.SoldStatus],\n        command=lambda x: self.status_within_activate_deactivate(x),\n    )\n\n    self.min_stories_om = ctk.CTkOptionMenu(\n        self.stories_frame, values=[story.value for story in RedfinApi.Stories]\n    )\n\n    self.min_year_built_om = ctk.CTkOptionMenu(\n        self.year_built_frame,\n        values=self.year_list,\n        command=lambda x: self.year_validation(),\n    )\n\n    self.max_year_built_om = ctk.CTkOptionMenu(\n        self.year_built_frame,\n        values=self.year_list,\n        command=lambda x: self.year_validation(),\n    )\n\n    self.house_type_house_switch = ctk.CTkSwitch(\n        self.home_type_frame,\n        text=\"House\",\n        command=self.house_type_validation,\n    )\n    self.house_type_townhouse_switch = ctk.CTkSwitch(\n        self.home_type_frame,\n        text=\"Townhouse\",\n        command=self.house_type_validation,\n    )\n    self.house_type_condo_switch = ctk.CTkSwitch(\n        self.home_type_frame,\n        text=\"Condo\",\n        command=self.house_type_validation,\n    )\n    self.house_type_mul_fam_switch = ctk.CTkSwitch(\n        self.home_type_frame,\n        text=\"Multi-Family\",\n        command=self.house_type_validation,\n    )\n\n    self.min_sqft_om = ctk.CTkOptionMenu(\n        self.square_feet_frame,\n        values=self.sqft_list,\n        command=lambda x: self.sqft_validation(),\n    )\n    self.max_sqft_om = ctk.CTkOptionMenu(\n        self.square_feet_frame,\n        values=self.sqft_list,\n        command=lambda x: self.sqft_validation(),\n    )\n    self.status_coming_soon_chb = ctk.CTkCheckBox(\n        self.status_frame, text=\"Coming soon\"\n    )\n    self.status_active_chb = ctk.CTkCheckBox(self.status_frame, text=\"Active\")\n    self.status_pending_chb = ctk.CTkCheckBox(\n        self.status_frame, text=\"Under contract/Pending\"\n    )  # missing one i think\n    self.sold_within_om = ctk.CTkOptionMenu(\n        self.sold_within_frame, values=self.sold_within_list\n    )\n\n    self.min_price_om = ctk.CTkOptionMenu(\n        self.price_range_frame,\n        values=self.price_list,\n        command=lambda x: self.price_validation(),\n    )\n    self.max_price_om = ctk.CTkOptionMenu(\n        self.price_range_frame,\n        values=self.price_list,\n        command=lambda x: self.price_validation(),\n    )\n\n    self.reset_filters_button = ctk.CTkButton(\n        self.reset_apply_frame,\n        text=\"Reset Filters\",\n        command=self.set_default_values,\n    )\n    self.apply_filters_button = ctk.CTkButton(\n        self.reset_apply_frame,\n        text=\"Apply Filters\",\n        command=self.change_to_search_page,\n    )\n\n    # Placing the widgets\n    self.for_sale_sold_label.grid(row=0, column=0)\n    self.stories_label.grid(row=0, column=0)\n    self.year_built_label.grid(row=0, column=0)\n    self.home_type_label.grid(row=0, column=0)\n    self.sqft_label.grid(row=0, column=0)\n    self.sale_status_label.grid(row=0, column=0)\n    self.price_range_label.grid(row=0, column=0)\n    self.year_built_from_label.grid(row=1, column=0)\n    self.year_built_to_label.grid(row=1, column=2)\n    self.price_range_from_label.grid(row=1, column=0)\n    self.price_range_to_label.grid(row=1, column=2)\n    self.sold_within_label.grid(row=0, column=0)\n    self.sold_within_from_label.grid(row=1, column=0)\n    self.sold_within_to_label.grid(row=1, column=2)\n\n    self.for_sale_sold_om.grid(row=0, column=1)\n    self.min_stories_om.grid(row=0, column=1)\n    self.min_year_built_om.grid(row=1, column=1)\n    self.max_year_built_om.grid(row=1, column=3)\n    self.min_sqft_om.grid(row=1, column=1)\n    self.max_sqft_om.grid(row=1, column=3)\n    self.sold_within_om.grid(row=0, column=1)\n    self.min_price_om.grid(row=1, column=1)\n    self.max_price_om.grid(row=1, column=3)\n    self.house_type_house_switch.grid(row=1, column=0)\n    self.house_type_townhouse_switch.grid(row=1, column=1)\n    self.house_type_condo_switch.grid(row=2, column=0)\n    self.house_type_mul_fam_switch.grid(row=2, column=1)\n    self.status_coming_soon_chb.grid(row=1, column=0)\n    self.status_active_chb.grid(row=1, column=1)\n    self.status_pending_chb.grid(row=1, column=2)\n    self.reset_filters_button.grid(row=0, column=0, sticky=\"nesw\")\n    self.apply_filters_button.grid(row=0, column=1, sticky=\"nesw\")\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.get_values","title":"get_values()","text":"

    Get the values of all widgets on this page.

    Returns:

    Type Description dict[str, Any]

    dict[str, Any]: dict of values

    Source code in src\\gui\\filterspage.py
    def get_values(self) -> dict[str, Any]:\n    \"\"\"Get the values of all widgets on this page.\n\n    Returns:\n        dict[str, Any]: dict of values\n    \"\"\"\n    match self.sold_within_om.get():\n        case \"Last 1 week\":\n            sold_within_days = RedfinApi.SoldWithinDays.ONE_WEEK\n        case \"Last 1 month\":\n            sold_within_days = RedfinApi.SoldWithinDays.ONE_MONTH\n        case \"Last 3 months\":\n            sold_within_days = RedfinApi.SoldWithinDays.THREE_MONTHS\n        case \"Last 6 months\":\n            sold_within_days = RedfinApi.SoldWithinDays.SIX_MONTHS\n        case \"Last 1 year\":\n            sold_within_days = RedfinApi.SoldWithinDays.ONE_YEAR\n        case \"Last 2 years\":\n            sold_within_days = RedfinApi.SoldWithinDays.TWO_YEARS\n        case \"Last 3 years\":\n            sold_within_days = RedfinApi.SoldWithinDays.THREE_YEARS\n        case _:\n            sold_within_days = RedfinApi.SoldWithinDays.FIVE_YEARS\n\n    return {\n        \"for sale sold\": self.for_sale_sold_om.get(),\n        \"min stories\": self.min_stories_om.get(),\n        \"max year built\": self.max_year_built_om.get(),  # do validation here\n        \"min year built\": self.min_year_built_om.get(),\n        \"sold within\": sold_within_days.value,\n        \"status active\": bool(self.status_active_chb.get()),\n        \"status coming soon\": bool(self.status_coming_soon_chb.get()),\n        \"status pending\": bool(self.status_pending_chb.get()),\n        \"house type house\": bool(self.house_type_house_switch.get()),\n        \"house type townhouse\": bool(self.house_type_townhouse_switch.get()),\n        \"house type mul fam\": bool(self.house_type_mul_fam_switch.get()),\n        \"house type condo\": bool(self.house_type_condo_switch.get()),\n        \"max sqft\": self.max_sqft_om.get(),\n        \"min sqft\": self.min_sqft_om.get(),\n        \"max price\": self.max_price_om.get(),\n        \"min price\": self.min_price_om.get(),\n    }\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.house_type_validation","title":"house_type_validation()","text":"

    House type switch validation to make sure at lest house is selected.

    Source code in src\\gui\\filterspage.py
    def house_type_validation(self) -> None:\n    \"\"\"House type switch validation to make sure at lest house is selected.\"\"\"\n    if not any(\n        [\n            self.house_type_house_switch.get(),\n            self.house_type_condo_switch.get(),\n            self.house_type_mul_fam_switch.get(),\n            self.house_type_townhouse_switch.get(),\n        ]\n    ):\n        self.house_type_house_switch.select()\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.price_validation","title":"price_validation()","text":"

    Called when price range min om gets changed

    Source code in src\\gui\\filterspage.py
    def price_validation(self):\n    \"\"\"Called when price range min om gets changed\"\"\"\n    if (\n        self.max_price_om.get() == RedfinApi.Price.NONE.value\n        or self.min_price_om.get() == RedfinApi.Price.NONE.value\n    ):\n        return\n    if int(self.max_price_om.get()) < int(self.min_price_om.get()):\n        self.max_price_om.set(self.min_price_om.get())\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.set_default_values","title":"set_default_values()","text":"

    Set the default values for all widgets. Note: Should be called after init and when clicking reset button.

    Source code in src\\gui\\filterspage.py
    def set_default_values(self) -> None:\n    \"\"\"Set the default values for all widgets.\n    Note:\n        Should be called after init and when clicking reset button.\n    \"\"\"\n    self.for_sale_sold_om.set(RedfinApi.SoldStatus.SOLD.value)\n    self.min_stories_om.set(RedfinApi.Stories.ONE.value)\n    self.min_year_built_om.set(str(self.cur_year - 1))\n    self.max_year_built_om.set(str(self.cur_year - 1))\n    self.sold_within_om.set(self.sold_within_list[-1])\n    self.max_price_om.set(RedfinApi.Price.NONE.value)\n    self.min_price_om.set(RedfinApi.Price.NONE.value)\n    self.max_sqft_om.set(RedfinApi.Sqft.NONE.value)\n    self.min_sqft_om.set(RedfinApi.Sqft.NONE.value)\n    self.status_active_chb.deselect()\n    self.status_pending_chb.deselect()\n    self.status_coming_soon_chb.deselect()\n    self.house_type_house_switch.select()\n    self.house_type_condo_switch.deselect()\n    self.house_type_townhouse_switch.deselect()\n    self.house_type_mul_fam_switch.deselect()\n    self.status_within_activate_deactivate(self.for_sale_sold_om.get())\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.sqft_validation","title":"sqft_validation()","text":"

    Sqft dropdown callback

    Source code in src\\gui\\filterspage.py
    def sqft_validation(self) -> None:\n    \"\"\"Sqft dropdown callback\"\"\"\n    if (\n        self.max_sqft_om.get() == RedfinApi.Sqft.NONE.value\n        or self.min_sqft_om.get() == RedfinApi.Sqft.NONE.value\n    ):\n        return\n    if int(self.max_sqft_om.get()) < int(self.min_sqft_om.get()):\n        self.max_sqft_om.set(self.min_sqft_om.get())\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.status_within_activate_deactivate","title":"status_within_activate_deactivate(status)","text":"

    Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.

    Parameters:

    Name Type Description Default status Event

    ignored

    required Source code in src\\gui\\filterspage.py
    def status_within_activate_deactivate(self, status) -> None:\n    \"\"\"Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.\n\n    Args:\n        status (Event): ignored\n    \"\"\"\n    match self.for_sale_sold_om.get():\n        case RedfinApi.SoldStatus.FOR_SALE.value:\n            self.sale_status_label.configure(state=\"normal\")\n            self.status_active_chb.configure(state=\"normal\")\n            self.status_coming_soon_chb.configure(state=\"normal\")\n            self.status_pending_chb.configure(state=\"normal\")\n            self.sold_within_label.configure(state=\"disabled\")\n            self.sold_within_om.configure(state=\"disabled\")\n        case RedfinApi.SoldStatus.SOLD.value:\n            self.sale_status_label.configure(state=\"disabled\")\n            self.status_active_chb.configure(state=\"disabled\")\n            self.status_coming_soon_chb.configure(state=\"disabled\")\n            self.status_pending_chb.configure(state=\"disabled\")\n            self.sold_within_label.configure(state=\"normal\")\n            self.sold_within_om.configure(state=\"normal\")\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.year_validation","title":"year_validation()","text":"

    Year drop down callback

    Source code in src\\gui\\filterspage.py
    def year_validation(self) -> None:\n    \"\"\"Year drop down callback\"\"\"\n    if int(self.max_year_built_om.get()) < int(self.min_year_built_om.get()):\n        self.max_year_built_om.set(self.min_year_built_om.get())\n
    "},{"location":"gui/searchpage/","title":"Searchpage","text":""},{"location":"gui/searchpage/#gui.searchpage.SearchPage","title":"SearchPage","text":"

    Bases: CTkFrame

    Source code in src\\gui\\searchpage.py
    class SearchPage(ctk.CTkFrame):\n    def __init__(self, master: ctk.CTk, **kwargs):\n        super().__init__(master, **kwargs)\n        self.master = master\n        self.datapage = None\n        self.label_font = ctk.CTkFont(\"Roboto\", 34)\n        self.MATCHES_TO_DISPLAY = 20  # performance and practicality\n        self.auto_complete_series = get_unique_msa_from_master()\n        self.current_auto_complete_series = None\n        self.prev_search_bar_len = 0\n        self.filters_page = FiltersPage(self.master, self)\n        self.create_widgets()\n\n    def create_widgets(self) -> None:\n        \"\"\"Create widgets.\"\"\"\n        self.top_text = ctk.CTkLabel(\n            self,\n            text=\"Residential Heating Search For Metropolitan Statistical Areas\",\n            font=self.label_font,\n            wraplength=600,\n        )\n        CTkToolTip(\n            self.top_text,\n            delay=0.25,\n            message=\"An MSA is a census defined region that consists of a city and \\nsurrounding communities that are linked by social and economic factors. \\nThe core city has a population of at least 50,000\",\n        )\n        self.redfin_filters_button = ctk.CTkButton(\n            self,\n            corner_radius=10,\n            height=35,\n            text=\"Add Filters\",\n            command=self.change_to_filters_page,\n        )\n        CTkToolTip(\n            self.redfin_filters_button,\n            delay=0.25,\n            message=\"Select filters for your search.\",\n        )\n        self.search_bar = ctk.CTkEntry(\n            self, height=40, corner_radius=40, placeholder_text=\"Search for an MSA\"\n        )\n        self.suggestion_list_box = CTkListbox(\n            self,\n            text_color=(\"gray10\", \"#DCE4EE\"),  # type: ignore\n            border_width=2,\n            command=lambda x: self.update_entry_on_autocomplete_select(x),\n        )\n        self.search_button = ctk.CTkButton(\n            self,\n            text=\"Search\",\n            fg_color=\"transparent\",\n            height=35,\n            corner_radius=10,\n            border_width=2,\n            text_color=(\"gray10\", \"#DCE4EE\"),\n            command=self.validate_entry_box_and_search,\n        )\n\n        self.columnconfigure((0, 2), weight=1)\n        self.columnconfigure(1, weight=4)\n        self.rowconfigure(0, weight=10)\n        self.rowconfigure(1, weight=4)\n        self.rowconfigure(2, weight=10)\n\n        self.top_text.grid(column=0, row=0, columnspan=3)\n\n        self.redfin_filters_button.grid(column=0, row=1, padx=(0, 40), sticky=\"e\")\n\n        self.search_bar.grid(column=1, row=1, sticky=\"ew\")\n\n        self.suggestion_list_box.grid(column=1, row=2, sticky=\"new\", pady=(10, 0))\n\n        self.search_button.grid(column=2, row=1, padx=(40, 0), sticky=\"w\")\n\n        self.suggestion_list_box.grid_remove()\n        self.search_bar.bind(\n            \"<KeyRelease>\", command=lambda x: self.update_suggestions_listbox(x)\n        )\n\n    def update_suggestions_listbox(self, x: Event | None) -> None:\n        \"\"\"Update the suggestions box based on the contents of 'self.search_bar'.\n\n        Args:\n            x (Event | None): ignored\n        \"\"\"\n        cur_text = re.escape(self.search_bar.get())\n        if cur_text == \"\":\n            # only gets called when all text has been deleted\n            self.current_auto_complete_series = self.auto_complete_series\n            self.suggestion_list_box.grid_remove()\n        else:\n            self.suggestion_list_box.delete(\"all\")\n            if (\n                self.current_auto_complete_series is None\n                or len(cur_text) < self.prev_search_bar_len\n            ):\n                self.current_auto_complete_series = self.auto_complete_series.filter(\n                    self.auto_complete_series.str.contains(rf\"(?i)^{cur_text}\")\n                )\n            else:\n                self.current_auto_complete_series = (\n                    self.current_auto_complete_series.filter(\n                        self.current_auto_complete_series.str.contains(\n                            rf\"(?i)^{cur_text}\"\n                        )\n                    )\n                )\n            self.suggestion_list_box.grid()\n            self.current_auto_complete_series.head(\n                self.MATCHES_TO_DISPLAY\n            ).map_elements(\n                lambda msa: self.suggestion_list_box.insert(\n                    \"end\", msa, border_width=2, border_color=\"gray\"\n                ),\n                return_dtype=pl.Utf8,\n            )\n        self.prev_search_bar_len = len(cur_text)\n\n    def update_entry_on_autocomplete_select(self, x: Event) -> None:\n        \"\"\"Suggestions list box callback for when a button in the list box is selected.\"\"\"\n        self.search_bar.delete(0, ctk.END)\n        self.search_bar.insert(0, x)\n        self.update_suggestions_listbox(None)\n\n    def validate_entry_box_and_search(self) -> None:\n        \"\"\"Validate `self.search_bar` contents and search if the contents are an MSA name.\"\"\"\n        cur_text = self.search_bar.get()\n        if len(cur_text) == 0:\n            cur_text = r\"!^\"\n        if any(self.auto_complete_series.str.contains(rf\"{cur_text}$\")):\n            self.data_page = DataPage(self.master)\n            self.data_page.grid(row=0, column=0, sticky=\"news\")\n            self.go_to_data_page(cur_text)\n            self.search_metros_threaded(cur_text)\n        else:\n            CTkMessagebox(\n                self,\n                title=\"Error\",\n                message=\"Inputted name is not in MSA name list!\",\n                icon=\"warning\",\n            )\n\n    def go_to_data_page(self, msa_name: str) -> None:\n        \"\"\"Switch to data page.\n\n        Args:\n            msa_name (str): Metropolitan Statistical Area name\n        \"\"\"\n        if self.data_page is not None:\n            self.grid_remove()\n            self.data_page.grid()\n            self.data_page.set_msa_name(msa_name)\n\n    def search_metros_threaded(self, msa_name: str) -> None:\n        \"\"\"Search the given Metropolitan Statistical Area name for housing attributes.\n\n        Args:\n            msa_name (str): Metropolitan Statistical Area name\n        \"\"\"\n        redfin_searcher = RedfinApi()\n        lock = threading.Lock()\n        with lock:\n            threading.Thread(\n                target=redfin_searcher.get_house_attributes_from_metro,\n                args=(msa_name, self.filters_page.get_values()),\n                daemon=True,\n            ).start()\n\n    def change_to_filters_page(self) -> None:\n        \"\"\"Change to filters page.\"\"\"\n        if self.filters_page is not None:\n            self.filters_page.grid(row=0, column=0, sticky=\"news\")\n            self.grid_remove()\n            self.filters_page.grid()\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.change_to_filters_page","title":"change_to_filters_page()","text":"

    Change to filters page.

    Source code in src\\gui\\searchpage.py
    def change_to_filters_page(self) -> None:\n    \"\"\"Change to filters page.\"\"\"\n    if self.filters_page is not None:\n        self.filters_page.grid(row=0, column=0, sticky=\"news\")\n        self.grid_remove()\n        self.filters_page.grid()\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.create_widgets","title":"create_widgets()","text":"

    Create widgets.

    Source code in src\\gui\\searchpage.py
    def create_widgets(self) -> None:\n    \"\"\"Create widgets.\"\"\"\n    self.top_text = ctk.CTkLabel(\n        self,\n        text=\"Residential Heating Search For Metropolitan Statistical Areas\",\n        font=self.label_font,\n        wraplength=600,\n    )\n    CTkToolTip(\n        self.top_text,\n        delay=0.25,\n        message=\"An MSA is a census defined region that consists of a city and \\nsurrounding communities that are linked by social and economic factors. \\nThe core city has a population of at least 50,000\",\n    )\n    self.redfin_filters_button = ctk.CTkButton(\n        self,\n        corner_radius=10,\n        height=35,\n        text=\"Add Filters\",\n        command=self.change_to_filters_page,\n    )\n    CTkToolTip(\n        self.redfin_filters_button,\n        delay=0.25,\n        message=\"Select filters for your search.\",\n    )\n    self.search_bar = ctk.CTkEntry(\n        self, height=40, corner_radius=40, placeholder_text=\"Search for an MSA\"\n    )\n    self.suggestion_list_box = CTkListbox(\n        self,\n        text_color=(\"gray10\", \"#DCE4EE\"),  # type: ignore\n        border_width=2,\n        command=lambda x: self.update_entry_on_autocomplete_select(x),\n    )\n    self.search_button = ctk.CTkButton(\n        self,\n        text=\"Search\",\n        fg_color=\"transparent\",\n        height=35,\n        corner_radius=10,\n        border_width=2,\n        text_color=(\"gray10\", \"#DCE4EE\"),\n        command=self.validate_entry_box_and_search,\n    )\n\n    self.columnconfigure((0, 2), weight=1)\n    self.columnconfigure(1, weight=4)\n    self.rowconfigure(0, weight=10)\n    self.rowconfigure(1, weight=4)\n    self.rowconfigure(2, weight=10)\n\n    self.top_text.grid(column=0, row=0, columnspan=3)\n\n    self.redfin_filters_button.grid(column=0, row=1, padx=(0, 40), sticky=\"e\")\n\n    self.search_bar.grid(column=1, row=1, sticky=\"ew\")\n\n    self.suggestion_list_box.grid(column=1, row=2, sticky=\"new\", pady=(10, 0))\n\n    self.search_button.grid(column=2, row=1, padx=(40, 0), sticky=\"w\")\n\n    self.suggestion_list_box.grid_remove()\n    self.search_bar.bind(\n        \"<KeyRelease>\", command=lambda x: self.update_suggestions_listbox(x)\n    )\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.go_to_data_page","title":"go_to_data_page(msa_name)","text":"

    Switch to data page.

    Parameters:

    Name Type Description Default msa_name str

    Metropolitan Statistical Area name

    required Source code in src\\gui\\searchpage.py
    def go_to_data_page(self, msa_name: str) -> None:\n    \"\"\"Switch to data page.\n\n    Args:\n        msa_name (str): Metropolitan Statistical Area name\n    \"\"\"\n    if self.data_page is not None:\n        self.grid_remove()\n        self.data_page.grid()\n        self.data_page.set_msa_name(msa_name)\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.search_metros_threaded","title":"search_metros_threaded(msa_name)","text":"

    Search the given Metropolitan Statistical Area name for housing attributes.

    Parameters:

    Name Type Description Default msa_name str

    Metropolitan Statistical Area name

    required Source code in src\\gui\\searchpage.py
    def search_metros_threaded(self, msa_name: str) -> None:\n    \"\"\"Search the given Metropolitan Statistical Area name for housing attributes.\n\n    Args:\n        msa_name (str): Metropolitan Statistical Area name\n    \"\"\"\n    redfin_searcher = RedfinApi()\n    lock = threading.Lock()\n    with lock:\n        threading.Thread(\n            target=redfin_searcher.get_house_attributes_from_metro,\n            args=(msa_name, self.filters_page.get_values()),\n            daemon=True,\n        ).start()\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.update_entry_on_autocomplete_select","title":"update_entry_on_autocomplete_select(x)","text":"

    Suggestions list box callback for when a button in the list box is selected.

    Source code in src\\gui\\searchpage.py
    def update_entry_on_autocomplete_select(self, x: Event) -> None:\n    \"\"\"Suggestions list box callback for when a button in the list box is selected.\"\"\"\n    self.search_bar.delete(0, ctk.END)\n    self.search_bar.insert(0, x)\n    self.update_suggestions_listbox(None)\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.update_suggestions_listbox","title":"update_suggestions_listbox(x)","text":"

    Update the suggestions box based on the contents of 'self.search_bar'.

    Parameters:

    Name Type Description Default x Event | None

    ignored

    required Source code in src\\gui\\searchpage.py
    def update_suggestions_listbox(self, x: Event | None) -> None:\n    \"\"\"Update the suggestions box based on the contents of 'self.search_bar'.\n\n    Args:\n        x (Event | None): ignored\n    \"\"\"\n    cur_text = re.escape(self.search_bar.get())\n    if cur_text == \"\":\n        # only gets called when all text has been deleted\n        self.current_auto_complete_series = self.auto_complete_series\n        self.suggestion_list_box.grid_remove()\n    else:\n        self.suggestion_list_box.delete(\"all\")\n        if (\n            self.current_auto_complete_series is None\n            or len(cur_text) < self.prev_search_bar_len\n        ):\n            self.current_auto_complete_series = self.auto_complete_series.filter(\n                self.auto_complete_series.str.contains(rf\"(?i)^{cur_text}\")\n            )\n        else:\n            self.current_auto_complete_series = (\n                self.current_auto_complete_series.filter(\n                    self.current_auto_complete_series.str.contains(\n                        rf\"(?i)^{cur_text}\"\n                    )\n                )\n            )\n        self.suggestion_list_box.grid()\n        self.current_auto_complete_series.head(\n            self.MATCHES_TO_DISPLAY\n        ).map_elements(\n            lambda msa: self.suggestion_list_box.insert(\n                \"end\", msa, border_width=2, border_color=\"gray\"\n            ),\n            return_dtype=pl.Utf8,\n        )\n    self.prev_search_bar_len = len(cur_text)\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.validate_entry_box_and_search","title":"validate_entry_box_and_search()","text":"

    Validate self.search_bar contents and search if the contents are an MSA name.

    Source code in src\\gui\\searchpage.py
    def validate_entry_box_and_search(self) -> None:\n    \"\"\"Validate `self.search_bar` contents and search if the contents are an MSA name.\"\"\"\n    cur_text = self.search_bar.get()\n    if len(cur_text) == 0:\n        cur_text = r\"!^\"\n    if any(self.auto_complete_series.str.contains(rf\"{cur_text}$\")):\n        self.data_page = DataPage(self.master)\n        self.data_page.grid(row=0, column=0, sticky=\"news\")\n        self.go_to_data_page(cur_text)\n        self.search_metros_threaded(cur_text)\n    else:\n        CTkMessagebox(\n            self,\n            title=\"Error\",\n            message=\"Inputted name is not in MSA name list!\",\n            icon=\"warning\",\n        )\n
    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#code-documentation","title":"Code Documentation","text":"

    Public functions are documented on the left. Additional comments for private functions are found through looking at the source code. The main.py file is not a part of the scraping library and just serves as a launching point for the GUI application.

    "},{"location":"#program-usage","title":"Program usage","text":"

    A YouTube playlist is being created that walks through how to use the scraping tool. An additional set of videos on how to combine the data collected and import and analyze them in QGIS is being created as well.

    "},{"location":"backend/helper/","title":"Helper","text":""},{"location":"backend/helper/#backend.helper.ASCIIColors","title":"ASCIIColors","text":"

    Bases: StrEnum

    ASCII colors for use in printing colored text to the terminal.

    Source code in src\\backend\\helper.py
    class ASCIIColors(StrEnum):\n    \"\"\"ASCII colors for use in printing colored text to the terminal.\"\"\"\n\n    GREY = \"\\x1b[38;20m\"\n    YELLOW = \"\\x1b[33;20m\"\n    RED = \"\\x1b[31;20m\"\n    BOLD_RED = \"\\x1b[31;1m\"\n    RESET = \"\\x1b[0m\"\n
    "},{"location":"backend/helper/#backend.helper.df_to_file","title":"df_to_file(df)","text":"

    Write a DataFrame to a unique file.

    Parameters:

    Name Type Description Default df DataFrame

    the DataFrame to write

    required Source code in src\\backend\\helper.py
    def df_to_file(df: pl.DataFrame):\n    \"\"\"Write a DataFrame to a unique file.\n\n    Args:\n        df (pl.DataFrame): the DataFrame to write\n    \"\"\"\n    file_path = OUTPUT_DIR / f\"{time.time()}_data_frame.csv\"\n    print(f\"Dataframe saved to {file_path.resolve()}\")\n    df.write_csv(file_path, include_header=True)\n
    "},{"location":"backend/helper/#backend.helper.is_valid_zipcode","title":"is_valid_zipcode(zip)","text":"

    Check if the given ZIP code is valid based on a local file.

    Parameters:

    Name Type Description Default zip int

    the ZIP code to check

    required

    Returns:

    Name Type Description bool bool

    if ZIP code is valid

    Source code in src\\backend\\helper.py
    def is_valid_zipcode(zip: int) -> bool:\n    \"\"\"Check if the given ZIP code is valid based on a local file.\n\n    Args:\n        zip (int): the ZIP code to check\n\n    Returns:\n        bool: if ZIP code is valid\n    \"\"\"\n    if isinstance(zip, str):\n        zip = int(zip)\n    return zip in MASTER_DF[\"ZIP\"]\n
    "},{"location":"backend/helper/#backend.helper.metro_name_to_zip_code_list","title":"metro_name_to_zip_code_list(msa_name)","text":"

    Return the constituent ZIP codes for the given Metropolitan Statistical Area.

    Parameters:

    Name Type Description Default msa_name str

    name of the Metropolitan Statistical Area

    required

    Returns:

    Type Description list[int]

    list[int]: list of ZIP codes found. Is empty if MSA name is invalid

    Source code in src\\backend\\helper.py
    def metro_name_to_zip_code_list(msa_name: str) -> list[int]:\n    \"\"\"Return the constituent ZIP codes for the given Metropolitan Statistical Area.\n\n    Args:\n        msa_name (str): name of the Metropolitan Statistical Area\n\n    Returns:\n        list[int]: list of ZIP codes found. Is empty if MSA name is invalid\n    \"\"\"\n    if msa_name == \"TEST\":\n        # return [20814]  # good and small\n        # return [22067, 55424]  # nulls in sqft\n        return [20015, 20018, 20017]  # nulls in sqft and large\n\n    df = MASTER_DF.select(\"ZIP\", \"METRO_NAME\", \"LSAD\")\n\n    return (\n        df.filter(\n            (pl.col(\"METRO_NAME\").eq(msa_name))\n            & (pl.col(\"LSAD\").eq(\"Metropolitan Statistical Area\"))\n        )\n        .unique()[\"ZIP\"]\n        .to_list()\n    )\n
    "},{"location":"backend/helper/#backend.helper.req_get_to_file","title":"req_get_to_file(request)","text":"

    Write the contents of a request response to a unique file.

    Parameters:

    Name Type Description Default request Response

    the request

    required

    Returns:

    Name Type Description int int

    the status code of the request

    Source code in src\\backend\\helper.py
    def req_get_to_file(request: requests.Response) -> int:\n    \"\"\"Write the contents of a request response to a unique file.\n\n    Args:\n        request (requests.Response): the request\n\n    Returns:\n        int: the status code of the request\n    \"\"\"\n    with open(OUTPUT_DIR / f\"{time.time()}_request.html\", \"w+\", encoding=\"utf-8\") as f:\n        f.write(request.text)\n    return request.status_code\n
    "},{"location":"backend/helper/#backend.helper.state_city_to_zip_df","title":"state_city_to_zip_df(state, city)","text":"

    Take in a state and city and return the ZIP code constituents of that city.

    Parameters:

    Name Type Description Default state str

    the state

    required city str

    the city

    required

    Returns:

    Type Description DataFrame

    pl.DataFrame: DataFrame of ZIP codes

    Source code in src\\backend\\helper.py
    def state_city_to_zip_df(state: str, city: str) -> pl.DataFrame:\n    \"\"\"Take in a state and city and return the ZIP code constituents of that city.\n\n    Args:\n        state (str): the state\n        city (str): the city\n\n    Returns:\n        pl.DataFrame: DataFrame of ZIP codes\n    \"\"\"\n    return (\n        pl.read_csv(\"zip_registry.csv\")\n        .filter((pl.col(\"state\") == state) & (pl.col(\"city\") == city))\n        .select(\"zipcode\")\n    )\n
    "},{"location":"backend/helper/#backend.helper.state_county_to_zip_df","title":"state_county_to_zip_df(state, county)","text":"

    Take in a state and county and return the ZIP code constituents of that county.

    Parameters:

    Name Type Description Default state str

    the state

    required county str

    the county

    required

    Returns:

    Type Description DataFrame

    pl.DataFrame: DataFrame of ZIP codes

    Source code in src\\backend\\helper.py
    def state_county_to_zip_df(state: str, county: str) -> pl.DataFrame:\n    \"\"\"Take in a state and county and return the ZIP code constituents of that county.\n\n    Args:\n        state (str): the state\n        county (str): the county\n\n    Returns:\n        pl.DataFrame: DataFrame of ZIP codes\n    \"\"\"\n    return (\n        pl.read_csv(\"zip_registry.csv\")\n        .filter((pl.col(\"state\") == state) & (pl.col(\"county\") == county))\n        .select(\"zipcode\")\n    )\n
    "},{"location":"backend/helper/#backend.helper.zip_to_metro","title":"zip_to_metro(zip)","text":"

    Find the Metropolitan Statistical Area name for the specified ZIP code.

    Parameters:

    Name Type Description Default zip int

    the ZIP code to look up

    required

    Returns:

    Name Type Description str str

    the Metropolitan name. Is empty if the ZIP code is not a part of a Metropolitan Statistical Area

    Source code in src\\backend\\helper.py
    def zip_to_metro(zip: int) -> str:\n    \"\"\"Find the Metropolitan Statistical Area name for the specified ZIP code.\n\n    Args:\n        zip (int): the ZIP code to look up\n\n    Returns:\n        str: the Metropolitan name. Is empty if the ZIP code is not a part of a Metropolitan Statistical Area\n    \"\"\"\n    result = MASTER_DF.filter(MASTER_DF[\"ZIP\"] == zip)[\"METRO_NAME\"]\n\n    if len(result) > 0:\n        log(\"Zip has multiple codes. Only giving first one\", \"debug\")\n        return result[0]\n    else:\n        return \"\"  # should this be none?\n
    "},{"location":"backend/redfinscraper/","title":"Redfinscraper","text":""},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi","title":"RedfinApi","text":"

    Scrape redfin using their stingray api. Use this class for getting and the iterating over ZIP code level data, creating an object for each new zip code.

    Source code in src\\backend\\redfinscraper.py
    class RedfinApi:\n    \"\"\"Scrape redfin using their stingray api. Use this class for getting and the iterating over ZIP code level data, creating an object for each new zip code.\"\"\"\n\n    class SoldStatus(StrEnum):\n        FOR_SALE = \"For Sale\"\n        SOLD = \"Sold\"\n\n    class HouseType(StrEnum):\n        HOUSE = \"1\"\n        CONDO = \"2\"\n        TOWNHOUSE = \"3\"\n        MULTI_FAMILY = \"4\"\n        LAND = \"5\"\n        OTHER = \"6\"\n\n    class Price(StrEnum):\n        NONE = \"None\"\n        FIFTY_THOU = \"50000\"\n        SEVENTY_FIVE_THOU = \"75000\"\n        ONE_HUN_THOU = \"100000\"\n        ONE_HUN_25_THOU = \"125000\"\n        ONE_HUN_5_THOU = \"150000\"\n        ONE_HUN_75_THOU = \"175000\"\n        TWO_HUN_THOU = \"200000\"\n        TWO_HUN_25_THOU = \"225000\"\n        TWO_HUN_5_THOU = \"250000\"\n        TWO_HUN_75_THOU = \"275000\"\n        THREE_HUN_THOU = \"300000\"\n        THREE_HUN_25_THOU = \"325000\"\n        THREE_HUN_5_THOU = \"350000\"\n        THREE_HUN_75_THOU = \"375000\"\n        FOUR_HUN_THOU = \"400000\"\n        FOUR_HUN_25_THOU = \"425000\"\n        FOUR_HUN_5_THOU = \"450000\"\n        FOUR_HUN_75_THOU = \"475000\"\n        FIVE_HUN_THOU = \"500000\"\n        FIVE_HUN_5_THOU = \"550000\"\n        SIX_HUN_THOU = \"600000\"\n        SIX_HUN_5_THOU = \"650000\"\n        SEVEN_HUN_THOU = \"700000\"\n        SEVEN_HUN_5_THOU = \"750000\"\n        EIGHT_HUN_THOU = \"800000\"\n        EIGHT_HUN_5_THOU = \"850000\"\n        NINE_HUN_THOU = \"900000\"\n        NINE_HUN_5_THOU = \"950000\"\n        ONE_MIL = \"1000000\"\n        ONE_MIL_25_THOU = \"1250000\"\n        ONE_MIL_5_THOU = \"1500000\"\n        ONE_MIL_75_THOU = \"1750000\"\n        TWO_MIL = \"2000000\"\n        TWO_MIL_25_THOU = \"2250000\"\n        TWO_MIL_5_THOU = \"2500000\"\n        TWO_MIL_75_THOU = \"2750000\"\n        THREE_MIL = \"3000000\"\n        THREE_MIL_25_THOU = \"3250000\"\n        THREE_MIL_5_THOU = \"3500000\"\n        THREE_MIL_75_THOU = \"3750000\"\n        FOUR_MIL = \"4000000\"\n        FOUR_MIL_25_THOU = \"4250000\"\n        FOUR_MIL_5_THOU = \"4500000\"\n        FOUR_MIL_75_THOU = \"4750000\"\n        FIVE_MIL = \"5000000\"\n        SIX_MIL = \"6000000\"\n        SEVEN_MIL = \"7000000\"\n        EIGHT_MIL = \"8000000\"\n        NINE_MIL = \"9000000\"\n        TEN_MIL = \"10000000\"\n\n    class SortOrder(StrEnum):\n        RECOMMENDED = \"redfin-recommended-asc\"\n        NEWEST = \"days-on-redfin-asc\"\n        MOST_RECENTLY_SOLD = \"last-sale-date-desc\"\n        LOW_HI = \"price-asc\"\n        HI_LOW = \"price-desc\"\n        SQFT = \"square-footage-desc\"\n        LOT_SIZE = \"lot-sq-ft-desc\"\n        SQFT_PRICE = \"dollars-per-sq-ft-asc\"\n\n    class SoldWithinDays(StrEnum):\n        ONE_WEEK = \"7\"\n        ONE_MONTH = \"30\"\n        THREE_MONTHS = \"90\"\n        SIX_MONTHS = \"180\"\n        ONE_YEAR = \"365\"\n        TWO_YEARS = \"730\"\n        THREE_YEARS = \"1095\"\n        FIVE_YEARS = \"1825\"\n\n    class Stories(StrEnum):\n        ONE = \"1\"\n        TWO = \"2\"\n        THREE = \"3\"\n        FOUR = \"4\"\n        FIVE = \"5\"\n        TEN = \"10\"\n        FIFTEEN = \"15\"\n        TWENTY = \"20\"\n\n    class Sqft(StrEnum):\n        NONE = \"None\"\n        SEVEN_FIFTY = \"750\"\n        THOU = \"1000\"\n        THOU_1 = \"1100\"\n        THOU_2 = \"1200\"\n        THOU_3 = \"1300\"\n        THOU_4 = \"1400\"\n        THOU_5 = \"1500\"\n        THOU_6 = \"1600\"\n        THOU_7 = \"1700\"\n        THOU_8 = \"1800\"\n        THOU_9 = \"1900\"\n        TWO_THOU = \"2000\"\n        TWO_THOU_250 = \"2250\"\n        TWO_THOU_500 = \"2500\"\n        TWO_THOU_750 = \"2750\"\n        THREE_THOU = \"3000\"\n        FOUR_THOU = \"4000\"\n        FIVE_THOU = \"5000\"\n        SEVEN_THOU_500 = \"7500\"\n        TEN_THOU = \"10000\"\n\n    def __init__(self) -> None:\n        self.rf = redfin.Redfin()\n        self.DESIRED_CSV_SCHEMA = {\n            \"ADDRESS\": str,\n            \"CITY\": str,\n            \"PROPERTY TYPE\": str,\n            \"STATE OR PROVINCE\": str,\n            \"YEAR BUILT\": pl.UInt16,\n            \"ZIP OR POSTAL CODE\": pl.UInt32,\n            \"PRICE\": pl.UInt32,\n            \"SQUARE FEET\": pl.UInt32,\n            \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\": str,\n            \"LATITUDE\": pl.Float32,\n            \"LONGITUDE\": pl.Float32,\n        }\n        self.STRING_ZIP_CSV_SCHEMA = {\n            \"ADDRESS\": str,\n            \"CITY\": str,\n            \"PROPERTY TYPE\": str,\n            \"STATE OR PROVINCE\": str,\n            \"YEAR BUILT\": pl.UInt16,\n            \"ZIP OR POSTAL CODE\": pl.Utf8,\n            \"PRICE\": pl.UInt32,\n            \"SQUARE FEET\": pl.UInt32,\n            \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\": str,\n            \"LATITUDE\": pl.Float32,\n            \"LONGITUDE\": pl.Float32,\n        }\n        self.search_params = None\n        self.column_dict = {key: False for key in CATEGORY_PATTERNS.keys()}\n\n    def set_search_params(self, zip: str, search_filters: dict[str, Any]) -> None:\n        \"\"\"Set the parameters for searching by ZIP code.\n\n        Args:\n            zip (str): the ZIP code\n            search_filters (dict[str, Any]): search filters for appending to a gis-csv path\n        \"\"\"\n        try:\n            region_info = self.get_region_info_from_zipcode(zip)\n        except json.JSONDecodeError:\n            log(f\"Could not decode region info for {zip}.\", \"warn\")\n            return None\n        except HTTPError:\n            log(f\"Could not retrieve region info for {zip}.\", \"warn\")\n            return None\n\n        if search_filters.get(\"for sale sold\") == \"Sold\":\n            sort_order = self.SortOrder.MOST_RECENTLY_SOLD.value\n        else:\n            sort_order = self.SortOrder.NEWEST.value\n        # TODO make sure to fix filtering so that its not just \"single family homes\"\n\n        try:\n            market = region_info[\"payload\"][\"rootDefaults\"][\"market\"]\n            region_id = region_info[\"payload\"][\"rootDefaults\"][\"region_id\"]\n            status = str(region_info[\"payload\"][\"rootDefaults\"][\"status\"])\n        except KeyError:\n            log(\"Market, region, or status could not be identified \", \"warn\")\n            return None\n\n        self.search_params = {\n            \"al\": 1,\n            \"has_deal\": \"false\",\n            \"has_dishwasher\": \"false\",\n            \"has_laundry_facility\": \"false\",\n            \"has_laundry_hookups\": \"false\",\n            \"has_parking\": \"false\",\n            \"has_pool\": \"false\",\n            \"has_short_term_lease\": \"false\",\n            \"include_pending_homes\": \"false\",  # probably an \"include\" option\n            \"isRentals\": \"false\",\n            \"is_furnished\": \"false\",\n            \"is_income_restricted\": \"false\",\n            \"is_senior_living\": \"false\",\n            \"max_year_built\": search_filters.get(\"max year built\"),\n            \"min_year_built\": search_filters.get(\"min year built\"),\n            \"market\": market,\n            \"min_stories\": search_filters.get(\"min stories\"),\n            \"num_homes\": 350,\n            \"ord\": sort_order,\n            \"page_number\": \"1\",\n            \"pool\": \"false\",\n            \"region_id\": region_id,\n            \"region_type\": \"2\",\n            \"status\": status,\n            \"travel_with_traffic\": \"false\",\n            \"travel_within_region\": \"false\",\n            \"utilities_included\": \"false\",\n            \"v\": \"8\",\n        }\n        if search_filters.get(\"for sale sold\") == \"Sold\":\n            self.search_params[\"sold_within_days\"] = search_filters.get(\"sold within\")\n            self.search_params[\"status\"] = 9\n        else:\n            self.search_params[\"sf\"] = \"1, 2, 3, 4, 5, 6, 7\"\n            match [\n                search_filters.get(\"status coming soon\"),\n                search_filters.get(\"status active\"),\n                search_filters.get(\"status pending\"),\n            ]:\n                case [True, False, False]:\n                    status = \"8\"\n                case [False, True, False]:\n                    status = \"1\"\n                case [False, False, True]:\n                    status = \"130\"\n                case [True, True, False]:\n                    status = \"9\"\n                case [False, True, True]:\n                    status = \"139\"\n                case [True, False, True]:\n                    status = \"138\"\n                case [True, True, True]:\n                    status = \"139\"\n\n            self.search_params[\"status\"] = status\n\n        if (max_sqft := search_filters.get(\"max sqft\")) != \"None\":\n            self.search_params[\"max_sqft\"] = max_sqft\n        if (min_sqft := search_filters.get(\"min sqft\")) != \"None\":\n            self.search_params[\"min_sqft\"] = min_sqft\n\n        if (max_price := search_filters.get(\"max price\")) != \"None\":\n            self.search_params[\"max_price\"] = max_price\n        if (min_price := search_filters.get(\"min price\")) != \"None\":\n            self.search_params[\"min_price\"] = min_price\n\n        houses = \"\"  # figure out how to join into comma string\n        if search_filters.get(\"house type house\") is True:\n            houses = houses + \"1\"\n        if search_filters.get(\"house type condo\") is True:\n            houses = houses + \"2\"\n        if search_filters.get(\"house type townhouse\") is True:\n            houses = houses + \"3\"\n        if search_filters.get(\"house type mul fam\") is True:\n            houses = houses + \"4\"\n\n        self.search_params[\"uipt\"] = \",\".join(list(houses))\n\n    # redfin setup\n    def meta_request_download(self, url: str, search_params) -> str:\n        \"\"\"Method for downloading objects from Redfin.\n\n        Args:\n            url (str): the Redfin URL\n\n        Returns:\n            str: the unicode text response\n        \"\"\"\n        response = requests.get(\n            self.rf.base + url, params=search_params, headers=self.rf.user_agent_header\n        )\n        log(response.request.url, \"debug\")\n        response.raise_for_status()\n        return response.text\n\n    def working_below_the_fold(self, property_id: str, listing_id: str = \"\") -> Any:\n        \"\"\"A below_the_fold method that accepts a listing ID.\n        Note:\n            If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it\n\n        Args:\n            property_id (str): the property ID\n            listing_id (str): The listing ID. Defaults to False.\n\n        Returns:\n            Any: response\n        \"\"\"\n        if listing_id:\n            params = {\n                \"accessLevel\": 1,\n                \"propertyId\": property_id,\n                \"listingId\": listing_id,\n                \"pageType\": 1,\n            }\n        else:\n            params = {\n                \"accessLevel\": 1,\n                \"propertyId\": property_id,\n                \"pageType\": 1,\n            }\n        return self.rf.meta_request(\"/api/home/details/belowTheFold\", params)\n\n    def get_region_info_from_zipcode(self, zip_code: str) -> Any:\n        \"\"\"Get the region ifo from a ZIP code.\n\n        Args:\n            zip_code (str): the ZIP code\n\n        Returns:\n            Any: response\n        \"\"\"\n        return self.rf.meta_request(\n            \"api/region\", {\"region_id\": zip_code, \"region_type\": 2, \"tz\": True, \"v\": 8}\n        )\n\n    def get_gis_csv(self, params: dict[str, Any]) -> str:\n        \"\"\"Get the gis-csv of an area based on the contents of `params`\n\n        Args:\n            params (dict[str, Any]): the parameters\n\n        Returns:\n            str: the CSV file as a unicode string\n        \"\"\"\n        return self.meta_request_download(\"api/gis-csv\", search_params=params)\n\n    def _rate_limit(self) -> None:\n        time.sleep(random.uniform(1, 1.6))\n\n    # calls stuff\n    def get_heating_info_from_super_group(self, super_group: dict) -> list[str]:\n        \"\"\"Extract heating information from a super group\n\n        :\n            Must supply a probable heating group for accurate information\n\n            Format of super group in JSON:\n            {\n                types: []\n                amenityGroups: [\n                    {\n                        groupTitle: \"\"\n                        referenceName : \"\"\n                        amenityEntries : [\n                            {\n                                amenityName : \"\"\n                                referenceName: \"\"\n                                accessLevel : 1\n                                displayLevel : 1\n                                amenityValues : []\n                            },...\n                        ]\n                    }\n                ]\n                titleString: \"\"\n            }\n\n            Format of groupTitle/propertyDetailsHeader on website:\n                Interior -> titleString\n                ...\n                    Heating & Cooling -> groupTitle\n                        Electric -> no amenityName\n                        Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName\n                        Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In\n\n        Args:\n            super_group (dict): the super group to extract terms from\n\n        Returns:\n            list[str]: list of heating terms\n        \"\"\"\n        amenity_values = []\n        utility_regex = re.compile(\"utilit\", re.I)\n        heating_and_cooling_regex = re.compile(\"heat\")\n        for amenity in super_group.get(\"amenityGroups\", \"\"):\n            group_title = amenity.get(\"groupTitle\", \"\")\n            if not any(AMENITY_GROUP_INCLUDE_PATTERNS.findall(group_title)):\n                continue  # this is the name that is bold\n            # these are the bulleted items.\n            for amenity_entry in amenity.get(\"amenityEntries\", \"\"):\n                # if == \"\", then item is dangling (no word before colon). give the same treatment to \"utilities: ...\" as if it were ==\"\"\n                amenity_name = amenity_entry.get(\"amenityName\", \"\")\n\n                if amenity_name and not any(utility_regex.findall(amenity_name)):\n                    # filter the before colon. first if is to have stricter capture rule when amenity item is \"Utilities: Natural gas, heat pump, ...\"\n                    if any(\n                        AMENITY_NAME_INCLUDE_PATTERNS.findall(amenity_name)\n                    ) and not any(AMENITY_NAME_EXCLUDE_PATTERNS.findall(amenity_name)):\n                        amenity_values.extend(\n                            [\n                                value\n                                for value in amenity_entry.get(\"amenityValues\", \"\")\n                                if any(\n                                    regex.findall(value)\n                                    for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS\n                                )\n                                and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))\n                            ]\n                        )\n                elif any(heating_and_cooling_regex.findall(group_title)):\n                    # if we are in \"heating & cooling\" and we are a dangling element\n                    amenity_values.extend(\n                        [\n                            value\n                            for value in amenity_entry.get(\"amenityValues\", \"\")\n                            if any(\n                                regex.findall(value)\n                                for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS\n                            )\n                            and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))\n                        ]\n                    )\n                else:\n                    # filter for appliance only if we are a dangling element or in the utilities bullet item\n                    amenity_values.extend(\n                        [\n                            value\n                            for value in amenity_entry.get(\"amenityValues\", \"\")\n                            if any(\n                                regex.findall(value)\n                                for regex in APPLIANCE_HEATING_RELATED_PATTERNS\n                            )\n                        ]\n                    )\n        return amenity_values\n\n    def get_super_groups_from_url(self, listing_url: str) -> list | None:\n        \"\"\"Get super group list from listing url.\n\n        Args:\n            listing_url (str): The path part of the listing URL. This is without the \"redfin.com\" part. Include the first forward slash\n\n        Returns:\n            list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found\n        \"\"\"\n        if \"redfin\" in listing_url:\n            listing_url = urlparse(listing_url).path\n\n        try:\n            self._rate_limit()\n            initial_info = self.rf.initial_info(listing_url)\n        except json.JSONDecodeError:\n            log(f\"Could not get initial info for {listing_url =}\", \"critical\")\n            return None\n        try:\n            property_id = initial_info[\"payload\"][\"propertyId\"]\n        except KeyError:\n            log(\"Could not find property id\", \"critical\")\n            return None\n        try:\n            listing_id = initial_info[\"payload\"][\"listingId\"]\n        except KeyError:\n            listing_id = None\n            log(\n                \"Could not find listing id. Will try to continue. if errors in final zip csv, this might be the issue\",\n                \"debug\",\n            )\n        try:\n            self._rate_limit()\n            if listing_id is None:\n                mls_data = self.working_below_the_fold(property_id)\n            else:\n                mls_data = self.working_below_the_fold(property_id, listing_id)\n        except json.JSONDecodeError:\n            log(f\"Could not find mls details for {listing_url = }\", \"warn\")\n            return None\n        try:\n            super_groups = mls_data[\"payload\"][\"amenitiesInfo\"][\"superGroups\"]\n        except KeyError:\n            log(f\"Could not find property details for {listing_url = }\", \"warn\")\n            return None\n        return super_groups\n\n    def get_heating_terms_dict_from_listing(\n        self, address_and_url_list: list[str]\n    ) -> dict[str, bool]:\n        \"\"\"Generate a filled out dictionary based on `self.column_dict` and the contents of :meth:get_heating_info_from_super_group(address_url_list).\n\n        TODO:\n            Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen\n\n        Args:\n            address_and_url_list (list[str]): address in the first position, and the listing URL in the second position\n\n        Returns:\n            dict[str, bool]: the filled out `self.column_dict` for the supplied address/listing URL\n        \"\"\"\n        address = address_and_url_list[0]\n        listing_url = address_and_url_list[1]\n        terms = []\n\n        super_groups = self.get_super_groups_from_url(listing_url)\n        if super_groups is None:\n            log(\"No amenities found\", \"info\")\n            return copy.deepcopy(self.column_dict)\n        for super_group in super_groups:  # dict\n            if any(\n                SUPER_GROUP_INCLUDE_PATTERNS.findall(super_group.get(\"titleString\", \"\"))\n            ):\n                terms.extend(self.get_heating_info_from_super_group(super_group))\n        if len(terms) == 0:\n            log(\n                f\"There was no heating information for {urlparse(listing_url).path}\",\n                \"info\",\n            )\n            return copy.deepcopy(self.column_dict)\n\n        # categorize the correct dict and return\n        master_dict = copy.deepcopy(self.column_dict)\n        for input_string in terms:\n            log(f\"{input_string = }\", \"debug\")\n            result = {}\n            for key, pattern in CATEGORY_PATTERNS.items():\n                if bool(re.search(pattern, input_string)):\n                    result[key] = True\n                    log(f\"Pattern matched on {key, pattern = }\", \"debug\")\n                log(f\"Pattern did not match on {key, pattern = }\", \"debug\")\n            for key in result.keys():\n                master_dict[key] = result[key] | master_dict[key]\n\n        # You'll have to df.unnest this for use in a dataframe\n        log(f\"{terms = }\", \"debug\")\n        log(f\"{master_dict = }\", \"debug\")\n        log(f\"Heating amenities found for {address}.\", \"info\")\n        return master_dict\n\n    def get_gis_csv_from_zip_with_filters(\n        self,\n    ) -> pl.DataFrame | None:\n        \"\"\"Clean the GIS CSV retrieved from using the `search_params` field into the desired schema.\n\n        Returns:\n            pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.\n        \"\"\"\n        if self.search_params is None:\n            return\n        csv_text = self.get_gis_csv(self.search_params)\n\n        home_types: str = self.search_params.get(\"uipt\", \"\")\n        if \"1\" in home_types:\n            home_types = home_types.replace(\"1\", \"Single Family Residential\")\n        if \"2\" in home_types:\n            home_types = home_types.replace(\"2\", \"Condo/Co-op\")\n        if \"3\" in home_types:\n            home_types = home_types.replace(\"3\", \"Townhouse\")\n        if \"4\" in home_types:\n            home_types = home_types.replace(\"4\", \"Multi-Family (2-4 Unit)\")\n\n        try:\n            df = (\n                pl.read_csv(\n                    io.StringIO(csv_text),\n                    dtypes=self.STRING_ZIP_CSV_SCHEMA,\n                )\n                .with_columns(\n                    pl.col(\"ZIP OR POSTAL CODE\").str.extract(r\"([0-9]{5})\", 1)\n                )\n                .cast({\"ZIP OR POSTAL CODE\": pl.UInt32})\n                .filter(\n                    pl.col(\"PROPERTY TYPE\").str.contains(\n                        \"|\".join(home_types.split(\",\"))\n                    )\n                )\n                .select(\n                    \"ADDRESS\",\n                    \"CITY\",\n                    \"STATE OR PROVINCE\",\n                    \"YEAR BUILT\",\n                    \"ZIP OR POSTAL CODE\",\n                    \"PRICE\",\n                    \"SQUARE FEET\",\n                    \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\",\n                    \"LATITUDE\",\n                    \"LONGITUDE\",\n                )\n            )\n            if df.height == 0:\n                log(\n                    \"CSV was empty. This can happen if local MLS rules dont allow downloads.\",\n                    \"debug\",\n                )\n                return None\n        except Exception as e:\n            log(f\"Could not read gis csv into dataframe.\\n{csv_text = }\\n{e}\", \"warn\")\n            return None\n        return df\n\n    def get_gis_csv_for_zips_in_metro_with_filters(\n        self, msa_name: str, search_filters: dict[str, Any]\n    ) -> pl.DataFrame | None:\n        \"\"\"Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.\n\n        Args:\n            msa_name (str): a Metropolitan Statistical Area\n            search_filters (dict[str, Any]): filters to search with. generate using :meth:\n\n        Returns:\n            pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs\n        \"\"\"\n        log(f\"Searching {msa_name} with filters {search_filters}.\", \"log\")\n        zip_codes = metro_name_to_zip_code_list(msa_name)\n        formatted_zip_codes = [f\"{zip_code:0{5}}\" for zip_code in zip_codes]\n        log(\n            f\"Estimated search time: {len(formatted_zip_codes) * 4.5}\",\n            \"info\",\n        )\n        list_of_csv_dfs = []\n        for zip in formatted_zip_codes:\n            self._rate_limit()\n            self.set_search_params(zip, search_filters)\n            temp = self.get_gis_csv_from_zip_with_filters()\n            if temp is None:\n                log(f\"Did not find any houses in {zip}.\", \"info\")\n                continue\n            log(f\"Found data for {temp.height} houses in {zip}.\", \"info\")\n            list_of_csv_dfs.append(temp)\n\n        if len(list_of_csv_dfs) == 0:\n            return None\n        return pl.concat(list_of_csv_dfs)\n\n    def get_house_attributes_from_metro(\n        self,\n        msa_name: str,\n        search_filters: dict[str, Any],\n        use_cached_gis_csv_csv: bool = False,\n    ) -> None:\n        \"\"\"Main function. Get the heating attributes of a Metropolitan Statistical Area.\n\n        TODO:\n            statistics on metropolitan\n            Log statistics about the heating outlook of a metro.\n\n        Args:\n            msa_name (str): Metropolitan Statistical Area name\n            search_filters (dict[str, Any]): search filters\n            use_cached_gis_csv_csv (bool, optional): Whether to use an already made GIS CSV DataFrame. Defaults to False.\n\n        Returns:\n            None: None if there were no houses found in the metro\n        \"\"\"\n        file_safe_msa_name = msa_name.strip().replace(\", \", \"_\").replace(\" \", \"_\")\n        METRO_OUTPUT_DIR_PATH = OUTPUT_DIR_PATH / file_safe_msa_name\n\n        if use_cached_gis_csv_csv:\n            log(\"Loading csv from cache.\", \"info\")\n            try:\n                search_page_csvs_df = pl.read_csv(\n                    METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\"),\n                    dtypes=self.DESIRED_CSV_SCHEMA,\n                )\n                log(\n                    f\"Loading csv from {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\")} is complete.\",\n                    \"info\",\n                )\n            except FileNotFoundError:\n                log(\n                    f\"Loading csv from {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\")} has failed, continuing with API search.\",\n                    \"info\",\n                )\n                search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(\n                    msa_name, search_filters\n                )\n        else:\n            search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(\n                msa_name, search_filters\n            )\n\n        if search_page_csvs_df is None:\n            log(f\"No houses found within {msa_name}. Try relaxing filters.\", \"info\")\n            return None\n\n        url_col_name = \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\"\n        search_page_csvs_df = search_page_csvs_df.filter(\n            (~pl.col(url_col_name).str.contains(\"(?i)unknown\"))\n            .and_(pl.col(\"ADDRESS\").str.len_chars().gt(0))\n            .and_(pl.col(\"SQUARE FEET\").is_not_null())\n            .and_(pl.col(\"YEAR BUILT\").is_not_null())\n        )\n        # doing this twice so that the search page does not have nulls in the year built column.\n        min_year_built = search_filters.get(\"min year built\")\n        max_year_built = search_filters.get(\"max year built\")\n        assert min_year_built is not None and max_year_built is not None\n\n        # max() Acts like a Boolean OR\n        search_page_csvs_df = (\n            search_page_csvs_df.filter(\n                pl.col(\"YEAR BUILT\")\n                .ge(int(min_year_built))\n                .and_(pl.col(\"YEAR BUILT\").le(int(max_year_built)))\n            )\n            .group_by(by=[\"LATITUDE\", \"LONGITUDE\"])\n            .max()\n        )\n\n        log(f\"Found {search_page_csvs_df.height} possible houses in {msa_name}\", \"info\")\n        METRO_OUTPUT_DIR_PATH.mkdir(parents=True, exist_ok=True)\n        log(\n            f\"Writing csv for metro to {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\")}\",\n            \"debug\",\n        )\n        search_page_csvs_df.write_csv(\n            METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\")\n        )\n\n        # go through whole csv and get the house attributes for each house. then partition the dataframe by ZIP and save files\n\n        log(\"Starting lookups on listing URLS\", \"info\")\n        log(\n            f\"Unique ZIP codes: {search_page_csvs_df[\"ZIP OR POSTAL CODE\"].n_unique()}\",\n            \"info\",\n        )\n        log(\n            f\"Estimated completion time: {search_page_csvs_df.height * 4.5} seconds\",\n            \"info\",\n        )\n\n        list_of_dfs_by_zip = search_page_csvs_df.partition_by(\"ZIP OR POSTAL CODE\")\n\n        for i, _ in enumerate(list_of_dfs_by_zip):\n            list_of_dfs_by_zip[i] = (\n                list_of_dfs_by_zip[i]\n                .with_columns(\n                    pl.concat_list([pl.col(\"ADDRESS\"), pl.col(url_col_name)])\n                    .map_elements(self.get_heating_terms_dict_from_listing)\n                    .alias(\"nest\")\n                )\n                .drop(url_col_name)\n                .unnest(\"nest\")\n            )\n\n            zip = list_of_dfs_by_zip[i].select(\"ZIP OR POSTAL CODE\").item(0, 0)\n            list_of_dfs_by_zip[i].write_csv(f\"{METRO_OUTPUT_DIR_PATH / str(zip)}.csv\")\n\n        if len(list_of_dfs_by_zip) > 0:\n            concat_df = pl.concat(list_of_dfs_by_zip)\n            log(f\"Information on {msa_name}:\", \"info\")\n            log(\n                f\"num entries: {concat_df.height}, avg. house price: ${concat_df.get_column(\"PRICE\").mean():,.2f}, electric houses: {concat_df.get_column(\"Electricity\").sum()}, gas houses: {concat_df.get_column(\"Natural Gas\").sum()}, propane houses: {concat_df.get_column(\"Propane\").sum()}, oil-fed houses: {concat_df.get_column(\"Diesel/Heating Oil\").sum()}, wood-fed houses: {concat_df.get_column(\"Wood/Pellet\").sum()}, solar-heated houses: {concat_df.get_column(\"Solar Heating\").sum()}, heat pump houses: {concat_df.get_column(\"Heat Pump\").sum()}, baseboard houses: {concat_df.get_column(\"Baseboard\").sum()}, furnace houses: {concat_df.get_column(\"Furnace\").sum()}, boiler houses: {concat_df.get_column(\"Boiler\").sum()}, radiator houses: {concat_df.get_column(\"Radiator\").sum()}, houses with radiant floors: {concat_df.get_column(\"Radiant Floor\").sum()}\",\n                \"info\",\n            )\n\n            concat_df.write_csv(f\"{METRO_OUTPUT_DIR_PATH}/full_info.csv\")\n\n        log(f\"Done with searching houses in {msa_name}!\", \"info\")\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_gis_csv","title":"get_gis_csv(params)","text":"

    Get the gis-csv of an area based on the contents of params

    Parameters:

    Name Type Description Default params dict[str, Any]

    the parameters

    required

    Returns:

    Name Type Description str str

    the CSV file as a unicode string

    Source code in src\\backend\\redfinscraper.py
    def get_gis_csv(self, params: dict[str, Any]) -> str:\n    \"\"\"Get the gis-csv of an area based on the contents of `params`\n\n    Args:\n        params (dict[str, Any]): the parameters\n\n    Returns:\n        str: the CSV file as a unicode string\n    \"\"\"\n    return self.meta_request_download(\"api/gis-csv\", search_params=params)\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_gis_csv_for_zips_in_metro_with_filters","title":"get_gis_csv_for_zips_in_metro_with_filters(msa_name, search_filters)","text":"

    Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.

    Parameters:

    Name Type Description Default msa_name str

    a Metropolitan Statistical Area

    required search_filters dict[str, Any]

    filters to search with. generate using :meth:

    required

    Returns:

    Type Description DataFrame | None

    pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs

    Source code in src\\backend\\redfinscraper.py
    def get_gis_csv_for_zips_in_metro_with_filters(\n    self, msa_name: str, search_filters: dict[str, Any]\n) -> pl.DataFrame | None:\n    \"\"\"Get a DataFrame of all GIS CSVs of a Metropolitan Statistical Area.\n\n    Args:\n        msa_name (str): a Metropolitan Statistical Area\n        search_filters (dict[str, Any]): filters to search with. generate using :meth:\n\n    Returns:\n        pl.DataFrame | None: return a DataFrame of all GIS CSVs retrieved for individual ZIP codes. None if there were no CSVs\n    \"\"\"\n    log(f\"Searching {msa_name} with filters {search_filters}.\", \"log\")\n    zip_codes = metro_name_to_zip_code_list(msa_name)\n    formatted_zip_codes = [f\"{zip_code:0{5}}\" for zip_code in zip_codes]\n    log(\n        f\"Estimated search time: {len(formatted_zip_codes) * 4.5}\",\n        \"info\",\n    )\n    list_of_csv_dfs = []\n    for zip in formatted_zip_codes:\n        self._rate_limit()\n        self.set_search_params(zip, search_filters)\n        temp = self.get_gis_csv_from_zip_with_filters()\n        if temp is None:\n            log(f\"Did not find any houses in {zip}.\", \"info\")\n            continue\n        log(f\"Found data for {temp.height} houses in {zip}.\", \"info\")\n        list_of_csv_dfs.append(temp)\n\n    if len(list_of_csv_dfs) == 0:\n        return None\n    return pl.concat(list_of_csv_dfs)\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_gis_csv_from_zip_with_filters","title":"get_gis_csv_from_zip_with_filters()","text":"

    Clean the GIS CSV retrieved from using the search_params field into the desired schema.

    Returns:

    Type Description DataFrame | None

    pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.

    Source code in src\\backend\\redfinscraper.py
    def get_gis_csv_from_zip_with_filters(\n    self,\n) -> pl.DataFrame | None:\n    \"\"\"Clean the GIS CSV retrieved from using the `search_params` field into the desired schema.\n\n    Returns:\n        pl.DataFrame | None: returns the DataFrame of cleaned information. None if there was not information in the GIS CSV file.\n    \"\"\"\n    if self.search_params is None:\n        return\n    csv_text = self.get_gis_csv(self.search_params)\n\n    home_types: str = self.search_params.get(\"uipt\", \"\")\n    if \"1\" in home_types:\n        home_types = home_types.replace(\"1\", \"Single Family Residential\")\n    if \"2\" in home_types:\n        home_types = home_types.replace(\"2\", \"Condo/Co-op\")\n    if \"3\" in home_types:\n        home_types = home_types.replace(\"3\", \"Townhouse\")\n    if \"4\" in home_types:\n        home_types = home_types.replace(\"4\", \"Multi-Family (2-4 Unit)\")\n\n    try:\n        df = (\n            pl.read_csv(\n                io.StringIO(csv_text),\n                dtypes=self.STRING_ZIP_CSV_SCHEMA,\n            )\n            .with_columns(\n                pl.col(\"ZIP OR POSTAL CODE\").str.extract(r\"([0-9]{5})\", 1)\n            )\n            .cast({\"ZIP OR POSTAL CODE\": pl.UInt32})\n            .filter(\n                pl.col(\"PROPERTY TYPE\").str.contains(\n                    \"|\".join(home_types.split(\",\"))\n                )\n            )\n            .select(\n                \"ADDRESS\",\n                \"CITY\",\n                \"STATE OR PROVINCE\",\n                \"YEAR BUILT\",\n                \"ZIP OR POSTAL CODE\",\n                \"PRICE\",\n                \"SQUARE FEET\",\n                \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\",\n                \"LATITUDE\",\n                \"LONGITUDE\",\n            )\n        )\n        if df.height == 0:\n            log(\n                \"CSV was empty. This can happen if local MLS rules dont allow downloads.\",\n                \"debug\",\n            )\n            return None\n    except Exception as e:\n        log(f\"Could not read gis csv into dataframe.\\n{csv_text = }\\n{e}\", \"warn\")\n        return None\n    return df\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_heating_info_from_super_group","title":"get_heating_info_from_super_group(super_group)","text":"

    Extract heating information from a super group

    : Must supply a probable heating group for accurate information

    Format of super group in JSON:\n{\n    types: []\n    amenityGroups: [\n        {\n            groupTitle: \"\"\n            referenceName : \"\"\n            amenityEntries : [\n                {\n                    amenityName : \"\"\n                    referenceName: \"\"\n                    accessLevel : 1\n                    displayLevel : 1\n                    amenityValues : []\n                },...\n            ]\n        }\n    ]\n    titleString: \"\"\n}\n\nFormat of groupTitle/propertyDetailsHeader on website:\n    Interior -> titleString\n    ...\n        Heating & Cooling -> groupTitle\n            Electric -> no amenityName\n            Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName\n            Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In\n

    Parameters:

    Name Type Description Default super_group dict

    the super group to extract terms from

    required

    Returns:

    Type Description list[str]

    list[str]: list of heating terms

    Source code in src\\backend\\redfinscraper.py
    def get_heating_info_from_super_group(self, super_group: dict) -> list[str]:\n    \"\"\"Extract heating information from a super group\n\n    :\n        Must supply a probable heating group for accurate information\n\n        Format of super group in JSON:\n        {\n            types: []\n            amenityGroups: [\n                {\n                    groupTitle: \"\"\n                    referenceName : \"\"\n                    amenityEntries : [\n                        {\n                            amenityName : \"\"\n                            referenceName: \"\"\n                            accessLevel : 1\n                            displayLevel : 1\n                            amenityValues : []\n                        },...\n                    ]\n                }\n            ]\n            titleString: \"\"\n        }\n\n        Format of groupTitle/propertyDetailsHeader on website:\n            Interior -> titleString\n            ...\n                Heating & Cooling -> groupTitle\n                    Electric -> no amenityName\n                    Ceiling Fan(s), Programmable Thermostat, Refrigeration -> no amenityName\n                    Heating/Cooling Updated In: 2022 -> amenityName = Heating/Cooling Updated In\n\n    Args:\n        super_group (dict): the super group to extract terms from\n\n    Returns:\n        list[str]: list of heating terms\n    \"\"\"\n    amenity_values = []\n    utility_regex = re.compile(\"utilit\", re.I)\n    heating_and_cooling_regex = re.compile(\"heat\")\n    for amenity in super_group.get(\"amenityGroups\", \"\"):\n        group_title = amenity.get(\"groupTitle\", \"\")\n        if not any(AMENITY_GROUP_INCLUDE_PATTERNS.findall(group_title)):\n            continue  # this is the name that is bold\n        # these are the bulleted items.\n        for amenity_entry in amenity.get(\"amenityEntries\", \"\"):\n            # if == \"\", then item is dangling (no word before colon). give the same treatment to \"utilities: ...\" as if it were ==\"\"\n            amenity_name = amenity_entry.get(\"amenityName\", \"\")\n\n            if amenity_name and not any(utility_regex.findall(amenity_name)):\n                # filter the before colon. first if is to have stricter capture rule when amenity item is \"Utilities: Natural gas, heat pump, ...\"\n                if any(\n                    AMENITY_NAME_INCLUDE_PATTERNS.findall(amenity_name)\n                ) and not any(AMENITY_NAME_EXCLUDE_PATTERNS.findall(amenity_name)):\n                    amenity_values.extend(\n                        [\n                            value\n                            for value in amenity_entry.get(\"amenityValues\", \"\")\n                            if any(\n                                regex.findall(value)\n                                for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS\n                            )\n                            and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))\n                        ]\n                    )\n            elif any(heating_and_cooling_regex.findall(group_title)):\n                # if we are in \"heating & cooling\" and we are a dangling element\n                amenity_values.extend(\n                    [\n                        value\n                        for value in amenity_entry.get(\"amenityValues\", \"\")\n                        if any(\n                            regex.findall(value)\n                            for regex in AFTER_COLON_FUEL_AND_APPLIANCE_INCLUDE_PATTERNS\n                        )\n                        and not any(AFTER_COLON_EXCLUDE_PATTERNS.findall(value))\n                    ]\n                )\n            else:\n                # filter for appliance only if we are a dangling element or in the utilities bullet item\n                amenity_values.extend(\n                    [\n                        value\n                        for value in amenity_entry.get(\"amenityValues\", \"\")\n                        if any(\n                            regex.findall(value)\n                            for regex in APPLIANCE_HEATING_RELATED_PATTERNS\n                        )\n                    ]\n                )\n    return amenity_values\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_heating_terms_dict_from_listing","title":"get_heating_terms_dict_from_listing(address_and_url_list)","text":"

    Generate a filled out dictionary based on self.column_dict and the contents of :meth:get_heating_info_from_super_group(address_url_list).

    TODO

    Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen

    Parameters:

    Name Type Description Default address_and_url_list list[str]

    address in the first position, and the listing URL in the second position

    required

    Returns:

    Type Description dict[str, bool]

    dict[str, bool]: the filled out self.column_dict for the supplied address/listing URL

    Source code in src\\backend\\redfinscraper.py
    def get_heating_terms_dict_from_listing(\n    self, address_and_url_list: list[str]\n) -> dict[str, bool]:\n    \"\"\"Generate a filled out dictionary based on `self.column_dict` and the contents of :meth:get_heating_info_from_super_group(address_url_list).\n\n    TODO:\n        Since addresses can be doubled and it is random which one gets chosen, just printing listing url so that we can see which one has been chosen\n\n    Args:\n        address_and_url_list (list[str]): address in the first position, and the listing URL in the second position\n\n    Returns:\n        dict[str, bool]: the filled out `self.column_dict` for the supplied address/listing URL\n    \"\"\"\n    address = address_and_url_list[0]\n    listing_url = address_and_url_list[1]\n    terms = []\n\n    super_groups = self.get_super_groups_from_url(listing_url)\n    if super_groups is None:\n        log(\"No amenities found\", \"info\")\n        return copy.deepcopy(self.column_dict)\n    for super_group in super_groups:  # dict\n        if any(\n            SUPER_GROUP_INCLUDE_PATTERNS.findall(super_group.get(\"titleString\", \"\"))\n        ):\n            terms.extend(self.get_heating_info_from_super_group(super_group))\n    if len(terms) == 0:\n        log(\n            f\"There was no heating information for {urlparse(listing_url).path}\",\n            \"info\",\n        )\n        return copy.deepcopy(self.column_dict)\n\n    # categorize the correct dict and return\n    master_dict = copy.deepcopy(self.column_dict)\n    for input_string in terms:\n        log(f\"{input_string = }\", \"debug\")\n        result = {}\n        for key, pattern in CATEGORY_PATTERNS.items():\n            if bool(re.search(pattern, input_string)):\n                result[key] = True\n                log(f\"Pattern matched on {key, pattern = }\", \"debug\")\n            log(f\"Pattern did not match on {key, pattern = }\", \"debug\")\n        for key in result.keys():\n            master_dict[key] = result[key] | master_dict[key]\n\n    # You'll have to df.unnest this for use in a dataframe\n    log(f\"{terms = }\", \"debug\")\n    log(f\"{master_dict = }\", \"debug\")\n    log(f\"Heating amenities found for {address}.\", \"info\")\n    return master_dict\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_house_attributes_from_metro","title":"get_house_attributes_from_metro(msa_name, search_filters, use_cached_gis_csv_csv=False)","text":"

    Main function. Get the heating attributes of a Metropolitan Statistical Area.

    TODO

    statistics on metropolitan Log statistics about the heating outlook of a metro.

    Parameters:

    Name Type Description Default msa_name str

    Metropolitan Statistical Area name

    required search_filters dict[str, Any]

    search filters

    required use_cached_gis_csv_csv bool

    Whether to use an already made GIS CSV DataFrame. Defaults to False.

    False

    Returns:

    Name Type Description None None

    None if there were no houses found in the metro

    Source code in src\\backend\\redfinscraper.py
    def get_house_attributes_from_metro(\n    self,\n    msa_name: str,\n    search_filters: dict[str, Any],\n    use_cached_gis_csv_csv: bool = False,\n) -> None:\n    \"\"\"Main function. Get the heating attributes of a Metropolitan Statistical Area.\n\n    TODO:\n        statistics on metropolitan\n        Log statistics about the heating outlook of a metro.\n\n    Args:\n        msa_name (str): Metropolitan Statistical Area name\n        search_filters (dict[str, Any]): search filters\n        use_cached_gis_csv_csv (bool, optional): Whether to use an already made GIS CSV DataFrame. Defaults to False.\n\n    Returns:\n        None: None if there were no houses found in the metro\n    \"\"\"\n    file_safe_msa_name = msa_name.strip().replace(\", \", \"_\").replace(\" \", \"_\")\n    METRO_OUTPUT_DIR_PATH = OUTPUT_DIR_PATH / file_safe_msa_name\n\n    if use_cached_gis_csv_csv:\n        log(\"Loading csv from cache.\", \"info\")\n        try:\n            search_page_csvs_df = pl.read_csv(\n                METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\"),\n                dtypes=self.DESIRED_CSV_SCHEMA,\n            )\n            log(\n                f\"Loading csv from {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\")} is complete.\",\n                \"info\",\n            )\n        except FileNotFoundError:\n            log(\n                f\"Loading csv from {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\")} has failed, continuing with API search.\",\n                \"info\",\n            )\n            search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(\n                msa_name, search_filters\n            )\n    else:\n        search_page_csvs_df = self.get_gis_csv_for_zips_in_metro_with_filters(\n            msa_name, search_filters\n        )\n\n    if search_page_csvs_df is None:\n        log(f\"No houses found within {msa_name}. Try relaxing filters.\", \"info\")\n        return None\n\n    url_col_name = \"URL (SEE https://www.redfin.com/buy-a-home/comparative-market-analysis FOR INFO ON PRICING)\"\n    search_page_csvs_df = search_page_csvs_df.filter(\n        (~pl.col(url_col_name).str.contains(\"(?i)unknown\"))\n        .and_(pl.col(\"ADDRESS\").str.len_chars().gt(0))\n        .and_(pl.col(\"SQUARE FEET\").is_not_null())\n        .and_(pl.col(\"YEAR BUILT\").is_not_null())\n    )\n    # doing this twice so that the search page does not have nulls in the year built column.\n    min_year_built = search_filters.get(\"min year built\")\n    max_year_built = search_filters.get(\"max year built\")\n    assert min_year_built is not None and max_year_built is not None\n\n    # max() Acts like a Boolean OR\n    search_page_csvs_df = (\n        search_page_csvs_df.filter(\n            pl.col(\"YEAR BUILT\")\n            .ge(int(min_year_built))\n            .and_(pl.col(\"YEAR BUILT\").le(int(max_year_built)))\n        )\n        .group_by(by=[\"LATITUDE\", \"LONGITUDE\"])\n        .max()\n    )\n\n    log(f\"Found {search_page_csvs_df.height} possible houses in {msa_name}\", \"info\")\n    METRO_OUTPUT_DIR_PATH.mkdir(parents=True, exist_ok=True)\n    log(\n        f\"Writing csv for metro to {METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\")}\",\n        \"debug\",\n    )\n    search_page_csvs_df.write_csv(\n        METRO_OUTPUT_DIR_PATH / (file_safe_msa_name + \".csv\")\n    )\n\n    # go through whole csv and get the house attributes for each house. then partition the dataframe by ZIP and save files\n\n    log(\"Starting lookups on listing URLS\", \"info\")\n    log(\n        f\"Unique ZIP codes: {search_page_csvs_df[\"ZIP OR POSTAL CODE\"].n_unique()}\",\n        \"info\",\n    )\n    log(\n        f\"Estimated completion time: {search_page_csvs_df.height * 4.5} seconds\",\n        \"info\",\n    )\n\n    list_of_dfs_by_zip = search_page_csvs_df.partition_by(\"ZIP OR POSTAL CODE\")\n\n    for i, _ in enumerate(list_of_dfs_by_zip):\n        list_of_dfs_by_zip[i] = (\n            list_of_dfs_by_zip[i]\n            .with_columns(\n                pl.concat_list([pl.col(\"ADDRESS\"), pl.col(url_col_name)])\n                .map_elements(self.get_heating_terms_dict_from_listing)\n                .alias(\"nest\")\n            )\n            .drop(url_col_name)\n            .unnest(\"nest\")\n        )\n\n        zip = list_of_dfs_by_zip[i].select(\"ZIP OR POSTAL CODE\").item(0, 0)\n        list_of_dfs_by_zip[i].write_csv(f\"{METRO_OUTPUT_DIR_PATH / str(zip)}.csv\")\n\n    if len(list_of_dfs_by_zip) > 0:\n        concat_df = pl.concat(list_of_dfs_by_zip)\n        log(f\"Information on {msa_name}:\", \"info\")\n        log(\n            f\"num entries: {concat_df.height}, avg. house price: ${concat_df.get_column(\"PRICE\").mean():,.2f}, electric houses: {concat_df.get_column(\"Electricity\").sum()}, gas houses: {concat_df.get_column(\"Natural Gas\").sum()}, propane houses: {concat_df.get_column(\"Propane\").sum()}, oil-fed houses: {concat_df.get_column(\"Diesel/Heating Oil\").sum()}, wood-fed houses: {concat_df.get_column(\"Wood/Pellet\").sum()}, solar-heated houses: {concat_df.get_column(\"Solar Heating\").sum()}, heat pump houses: {concat_df.get_column(\"Heat Pump\").sum()}, baseboard houses: {concat_df.get_column(\"Baseboard\").sum()}, furnace houses: {concat_df.get_column(\"Furnace\").sum()}, boiler houses: {concat_df.get_column(\"Boiler\").sum()}, radiator houses: {concat_df.get_column(\"Radiator\").sum()}, houses with radiant floors: {concat_df.get_column(\"Radiant Floor\").sum()}\",\n            \"info\",\n        )\n\n        concat_df.write_csv(f\"{METRO_OUTPUT_DIR_PATH}/full_info.csv\")\n\n    log(f\"Done with searching houses in {msa_name}!\", \"info\")\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_region_info_from_zipcode","title":"get_region_info_from_zipcode(zip_code)","text":"

    Get the region ifo from a ZIP code.

    Parameters:

    Name Type Description Default zip_code str

    the ZIP code

    required

    Returns:

    Name Type Description Any Any

    response

    Source code in src\\backend\\redfinscraper.py
    def get_region_info_from_zipcode(self, zip_code: str) -> Any:\n    \"\"\"Get the region ifo from a ZIP code.\n\n    Args:\n        zip_code (str): the ZIP code\n\n    Returns:\n        Any: response\n    \"\"\"\n    return self.rf.meta_request(\n        \"api/region\", {\"region_id\": zip_code, \"region_type\": 2, \"tz\": True, \"v\": 8}\n    )\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.get_super_groups_from_url","title":"get_super_groups_from_url(listing_url)","text":"

    Get super group list from listing url.

    Parameters:

    Name Type Description Default listing_url str

    The path part of the listing URL. This is without the \"redfin.com\" part. Include the first forward slash

    required

    Returns:

    Type Description list | None

    list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found

    Source code in src\\backend\\redfinscraper.py
    def get_super_groups_from_url(self, listing_url: str) -> list | None:\n    \"\"\"Get super group list from listing url.\n\n    Args:\n        listing_url (str): The path part of the listing URL. This is without the \"redfin.com\" part. Include the first forward slash\n\n    Returns:\n        list | None: List of all super groups from a Redfin Url. None if an error is encountered or if no super groups were found\n    \"\"\"\n    if \"redfin\" in listing_url:\n        listing_url = urlparse(listing_url).path\n\n    try:\n        self._rate_limit()\n        initial_info = self.rf.initial_info(listing_url)\n    except json.JSONDecodeError:\n        log(f\"Could not get initial info for {listing_url =}\", \"critical\")\n        return None\n    try:\n        property_id = initial_info[\"payload\"][\"propertyId\"]\n    except KeyError:\n        log(\"Could not find property id\", \"critical\")\n        return None\n    try:\n        listing_id = initial_info[\"payload\"][\"listingId\"]\n    except KeyError:\n        listing_id = None\n        log(\n            \"Could not find listing id. Will try to continue. if errors in final zip csv, this might be the issue\",\n            \"debug\",\n        )\n    try:\n        self._rate_limit()\n        if listing_id is None:\n            mls_data = self.working_below_the_fold(property_id)\n        else:\n            mls_data = self.working_below_the_fold(property_id, listing_id)\n    except json.JSONDecodeError:\n        log(f\"Could not find mls details for {listing_url = }\", \"warn\")\n        return None\n    try:\n        super_groups = mls_data[\"payload\"][\"amenitiesInfo\"][\"superGroups\"]\n    except KeyError:\n        log(f\"Could not find property details for {listing_url = }\", \"warn\")\n        return None\n    return super_groups\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.meta_request_download","title":"meta_request_download(url, search_params)","text":"

    Method for downloading objects from Redfin.

    Parameters:

    Name Type Description Default url str

    the Redfin URL

    required

    Returns:

    Name Type Description str str

    the unicode text response

    Source code in src\\backend\\redfinscraper.py
    def meta_request_download(self, url: str, search_params) -> str:\n    \"\"\"Method for downloading objects from Redfin.\n\n    Args:\n        url (str): the Redfin URL\n\n    Returns:\n        str: the unicode text response\n    \"\"\"\n    response = requests.get(\n        self.rf.base + url, params=search_params, headers=self.rf.user_agent_header\n    )\n    log(response.request.url, \"debug\")\n    response.raise_for_status()\n    return response.text\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.set_search_params","title":"set_search_params(zip, search_filters)","text":"

    Set the parameters for searching by ZIP code.

    Parameters:

    Name Type Description Default zip str

    the ZIP code

    required search_filters dict[str, Any]

    search filters for appending to a gis-csv path

    required Source code in src\\backend\\redfinscraper.py
    def set_search_params(self, zip: str, search_filters: dict[str, Any]) -> None:\n    \"\"\"Set the parameters for searching by ZIP code.\n\n    Args:\n        zip (str): the ZIP code\n        search_filters (dict[str, Any]): search filters for appending to a gis-csv path\n    \"\"\"\n    try:\n        region_info = self.get_region_info_from_zipcode(zip)\n    except json.JSONDecodeError:\n        log(f\"Could not decode region info for {zip}.\", \"warn\")\n        return None\n    except HTTPError:\n        log(f\"Could not retrieve region info for {zip}.\", \"warn\")\n        return None\n\n    if search_filters.get(\"for sale sold\") == \"Sold\":\n        sort_order = self.SortOrder.MOST_RECENTLY_SOLD.value\n    else:\n        sort_order = self.SortOrder.NEWEST.value\n    # TODO make sure to fix filtering so that its not just \"single family homes\"\n\n    try:\n        market = region_info[\"payload\"][\"rootDefaults\"][\"market\"]\n        region_id = region_info[\"payload\"][\"rootDefaults\"][\"region_id\"]\n        status = str(region_info[\"payload\"][\"rootDefaults\"][\"status\"])\n    except KeyError:\n        log(\"Market, region, or status could not be identified \", \"warn\")\n        return None\n\n    self.search_params = {\n        \"al\": 1,\n        \"has_deal\": \"false\",\n        \"has_dishwasher\": \"false\",\n        \"has_laundry_facility\": \"false\",\n        \"has_laundry_hookups\": \"false\",\n        \"has_parking\": \"false\",\n        \"has_pool\": \"false\",\n        \"has_short_term_lease\": \"false\",\n        \"include_pending_homes\": \"false\",  # probably an \"include\" option\n        \"isRentals\": \"false\",\n        \"is_furnished\": \"false\",\n        \"is_income_restricted\": \"false\",\n        \"is_senior_living\": \"false\",\n        \"max_year_built\": search_filters.get(\"max year built\"),\n        \"min_year_built\": search_filters.get(\"min year built\"),\n        \"market\": market,\n        \"min_stories\": search_filters.get(\"min stories\"),\n        \"num_homes\": 350,\n        \"ord\": sort_order,\n        \"page_number\": \"1\",\n        \"pool\": \"false\",\n        \"region_id\": region_id,\n        \"region_type\": \"2\",\n        \"status\": status,\n        \"travel_with_traffic\": \"false\",\n        \"travel_within_region\": \"false\",\n        \"utilities_included\": \"false\",\n        \"v\": \"8\",\n    }\n    if search_filters.get(\"for sale sold\") == \"Sold\":\n        self.search_params[\"sold_within_days\"] = search_filters.get(\"sold within\")\n        self.search_params[\"status\"] = 9\n    else:\n        self.search_params[\"sf\"] = \"1, 2, 3, 4, 5, 6, 7\"\n        match [\n            search_filters.get(\"status coming soon\"),\n            search_filters.get(\"status active\"),\n            search_filters.get(\"status pending\"),\n        ]:\n            case [True, False, False]:\n                status = \"8\"\n            case [False, True, False]:\n                status = \"1\"\n            case [False, False, True]:\n                status = \"130\"\n            case [True, True, False]:\n                status = \"9\"\n            case [False, True, True]:\n                status = \"139\"\n            case [True, False, True]:\n                status = \"138\"\n            case [True, True, True]:\n                status = \"139\"\n\n        self.search_params[\"status\"] = status\n\n    if (max_sqft := search_filters.get(\"max sqft\")) != \"None\":\n        self.search_params[\"max_sqft\"] = max_sqft\n    if (min_sqft := search_filters.get(\"min sqft\")) != \"None\":\n        self.search_params[\"min_sqft\"] = min_sqft\n\n    if (max_price := search_filters.get(\"max price\")) != \"None\":\n        self.search_params[\"max_price\"] = max_price\n    if (min_price := search_filters.get(\"min price\")) != \"None\":\n        self.search_params[\"min_price\"] = min_price\n\n    houses = \"\"  # figure out how to join into comma string\n    if search_filters.get(\"house type house\") is True:\n        houses = houses + \"1\"\n    if search_filters.get(\"house type condo\") is True:\n        houses = houses + \"2\"\n    if search_filters.get(\"house type townhouse\") is True:\n        houses = houses + \"3\"\n    if search_filters.get(\"house type mul fam\") is True:\n        houses = houses + \"4\"\n\n    self.search_params[\"uipt\"] = \",\".join(list(houses))\n
    "},{"location":"backend/redfinscraper/#backend.redfinscraper.RedfinApi.working_below_the_fold","title":"working_below_the_fold(property_id, listing_id='')","text":"

    A below_the_fold method that accepts a listing ID. Note: If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it

    Parameters:

    Name Type Description Default property_id str

    the property ID

    required listing_id str

    The listing ID. Defaults to False.

    ''

    Returns:

    Name Type Description Any Any

    response

    Source code in src\\backend\\redfinscraper.py
    def working_below_the_fold(self, property_id: str, listing_id: str = \"\") -> Any:\n    \"\"\"A below_the_fold method that accepts a listing ID.\n    Note:\n        If you can get the listing ID, make sure to pass it to this function. You will possibly get incorrect data if you do not pass it\n\n    Args:\n        property_id (str): the property ID\n        listing_id (str): The listing ID. Defaults to False.\n\n    Returns:\n        Any: response\n    \"\"\"\n    if listing_id:\n        params = {\n            \"accessLevel\": 1,\n            \"propertyId\": property_id,\n            \"listingId\": listing_id,\n            \"pageType\": 1,\n        }\n    else:\n        params = {\n            \"accessLevel\": 1,\n            \"propertyId\": property_id,\n            \"pageType\": 1,\n        }\n    return self.rf.meta_request(\"/api/home/details/belowTheFold\", params)\n
    "},{"location":"backend/secondarydata/","title":"Secondarydata","text":""},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever","title":"CensusDataRetriever","text":"

    Interact with the Census data API.

    Note

    ACS5 paths can be found here: https://api.census.gov/data/2019/acs/acs5.html

    Source code in src\\backend\\secondarydata.py
    class CensusDataRetriever:\n    \"\"\"Interact with the Census data API.\n\n    Note:\n        ACS5 paths can be found here: https://api.census.gov/data/2019/acs/acs5.html\"\"\"\n\n    def __init__(self) -> None:\n        self.base_url = \"https://data.census.gov/\"\n        # https://api.census.gov/data/2021/acs/acs5/profile/variables.html\n        self.api_key = os.getenv(\"CENSUS_API_KEY\")\n        if self.api_key is None:\n            log(\n                \"No Census API key found in a .env file in project directory. please request a key at https://api.census.gov/data/key_signup.html\",\n                \"critical\",\n            )\n            exit()\n        self.MAX_COL_NAME_LENGTH = 80\n\n    def _get(self, url: str) -> requests.Response | None:\n        r = requests.get(url, timeout=65)\n        if r.status_code == 400:\n            log(f\"Unknown variable {r.text.split(\"variable \")[-1]}\", \"info\")\n            return None\n        return r\n\n    def get_and_cache_data(\n        self, file_name: str, url_to_lookup_on_miss: str\n    ) -> dict[str, str] | bool:\n        \"\"\"Cache files.\n\n        Args:\n            file_name (str): file name to save/lookup\n            url_to_lookup_on_miss (str): the Census url to lookup\n\n        Returns:\n            bool | dict[str, str] | None | Any: the dict of `tablename: label` or\n        \"\"\"\n        CENSUS_DATA_CACHE_PATH.mkdir(parents=True, exist_ok=True)\n\n        my_json = None\n\n        try:\n            with open(CENSUS_DATA_CACHE_PATH / file_name, mode=\"r\") as f:\n                log(f\"Reading {file_name}\", \"debug\")\n                try:\n                    my_json = json.load(f)\n                except json.JSONDecodeError:\n                    log(\"Could not decode cached census file\", \"error\")\n                    return False\n        except FileNotFoundError:\n            req = self._get(url_to_lookup_on_miss)\n            log(f\"Getting {url_to_lookup_on_miss}...\", \"info\")\n            if req is None:\n                log(f\"Could not get census file {file_name}.\", \"error\")\n                return False\n            req.raise_for_status()\n            my_json = req.json()\n            with open(CENSUS_DATA_CACHE_PATH / file_name, \"w\") as f:\n                json.dump(my_json, f)\n\n        return my_json\n\n    def get_race_makeup_by_zcta(self, zcta: str) -> str | None:\n        \"\"\"Get race make up by zcta from. DO NOT USE\n\n        Note:\n            use `get_table_group_for_zcta_by_state_by_year`\n\n        Args:\n            zcta (str): zcta\n\n        Returns:\n            str | None: text or none\n        \"\"\"\n        # get white, black, american indian/native alaskan, asian, NH/PI, other. note that these are estimates, margin of error can be had with \"M\"\n        req = self._get(\n            f\"https://api.census.gov/data/2021/acs/acs5/profile?get=DP05_0064E,DP05_0065E,DP05_0066E,DP05_0067E,DP05_0068E,DP05_0069E&for=zip%20code%20tabulation%20area:{zcta}&key={self.api_key}\"\n        )\n        if req is None:\n            return None\n        return req.text\n\n    def _get_acs5_profile_table_to_group_name(\n        self, table: str, year: str\n    ) -> dict[str, Any] | None:\n        \"\"\"Get a JSON representation of a table's attributes.\n\n        Note:\n            Tables must be:\n                * DP02\n                * DP02PR\n                * DP03\n                * DP04\n                * DP05\n\n            Returned object will have entries similar to:\n            ```json\n            \"DP05_0037M\": {\n                \"label\": \"Margin of Error!!RACE!!Total population!!One race!!White\",\n                \"concept\": \"ACS DEMOGRAPHIC AND HOUSING ESTIMATES\",\n                \"predicateType\": \"int\",\n                \"group\": \"DP05\",\n                \"limit\": 0,\n                \"predicateOnly\": true\n            }\n            ```\n\n        Args:\n            table (str): the table to lookup\n            year (str): which acs5 year to look up\n\n        Returns:\n            str | Any: json object\n        \"\"\"\n        file_name = f\"{year}-acs5-profile-groups-{table}.json\"\n        groups_url = (\n            f\"https://api.census.gov/data/{year}/acs/acs5/profile/groups/{table}.json\"\n        )\n        groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)\n        if groups_to_label_translation is False:\n            log(\"Something is wrong with groups label dict\", \"warn\")\n            return None\n        return groups_to_label_translation[\"variables\"]  # type: ignore\n\n    def _translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(\n        self, headers: list[str], table: str, year: str\n    ) -> None:\n        \"\"\"Get the label name for a table and row for the acs5 profile surveys.\n\n        Args:\n            headers (list[str]): header row\n            table (str): have to look again\n            year (str): the year\n\n        Returns:\n            None: translates the list of table_row_selector to its english label\n        \"\"\"\n        # is going to read the file multiple times, save last req as {\"table\": req_json[table]...} for this?\n        groups_to_label_translation_dict = self._get_acs5_profile_table_to_group_name(\n            table, year\n        )\n        if groups_to_label_translation_dict is None:\n            log(\"Could not translate headers\", \"warn\")\n            return groups_to_label_translation_dict\n\n        for idx, header in enumerate(headers):\n            new_col_name_dict = groups_to_label_translation_dict.get(header)\n            if new_col_name_dict is None:\n                # returns none if not in dict, means we have custom name and can continue\n                continue\n            new_col_name = new_col_name_dict[\"label\"]\n            # qgis doesnt allow field names of 80+ chars. massage into form, then cut off\n            # delimiter for table subsection\n            new_col_name = new_col_name.replace(\"$\", \"D\")\n            new_col_name = new_col_name.replace(\",\", \"\")\n            new_col_name = new_col_name.replace(\"'\", \"\")\n            new_col_name = re.sub(r\"\\s+\", \" \", new_col_name)\n            new_col_name = new_col_name.replace(\"!!\", \" \")\n            # easier to read\n            new_col_name_parts = new_col_name.split(\" \")\n            for idy, no_format in enumerate(new_col_name_parts):\n                new_col_name_parts[idy] = no_format.capitalize()\n            new_col_name = \"\".join(new_col_name_parts)\n            # shortenings to fit length requirement\n            for key, value in REPLACEMENT_DICT.items():\n                new_col_name = re.sub(key, value, new_col_name)\n            # limiter\n            new_col_name = new_col_name[\n                : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)\n            ]\n\n            if new_col_name not in headers[:idx]:\n                headers[idx] = new_col_name\n\n    def generate_acs5_profile_table_group_for_zcta_by_year(\n        self, table: str, year: str\n    ) -> str:\n        \"\"\"CSV output of an acs 5 year profile survey table.\n\n        TODO:\n            Update func name\n\n        Args:\n            table (str): census demo acs5 table\n            year (str): year to search\n\n        Returns:\n            str: file path where output is saved\n        \"\"\"\n        file_name = f\"{year}-acs-profile-table-{table}.json\"\n        url = f\"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({table})&for=zip%20code%20tabulation%20area:*\"\n        list_of_list_table_json = self.get_and_cache_data(file_name, url)\n\n        if list_of_list_table_json is False:\n            log(\n                f\"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.\",\n                \"warn\",\n            )\n            return \"\"\n\n        self._translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(\n            list_of_list_table_json[0],  # type: ignore\n            table,\n            year,  # type: ignore\n        )\n\n        df = pl.DataFrame(list_of_list_table_json, orient=\"row\")\n        # funky stuff to get the first list to be the name of the columns\n        df = (\n            df.rename(df.head(1).to_dicts().pop())\n            .slice(1)  # type: ignore\n            .drop(\"NAME\", cs.matches(\"(?i)^(ann)\"), cs.matches(f\"(?i){table}\"))\n            .rename({\"zip code tabulation area\": \"ZCTA\"})\n            .cast(\n                {\n                    \"ZCTA\": pl.Int32,\n                }\n            )\n        )\n        table_file_name = CENSUS_DATA_DIR_PATH / f\"acs5-profile-group-{table}-zcta.csv\"\n        df.write_csv(table_file_name)\n        return str(table_file_name)\n\n    def _get_acs5_subject_table_to_group_name(\n        self, table: str, year: str\n    ) -> dict[str, Any] | None:\n        \"\"\"Get a JSON representation of a table's attributes.\n\n        Note:\n            Tables can be found at: https://www.census.gov/acs/www/data/data-tables-and-tools/subject-tables/\n\n            Returned object will have entries similar to:\n            ```json\n            \"DP05_0037M\": {\n                \"label\": \"Margin of Error!!RACE!!Total population!!One race!!White\",\n                \"concept\": \"ACS DEMOGRAPHIC AND HOUSING ESTIMATES\",\n                \"predicateType\": \"int\",\n                \"group\": \"DP05\",\n                \"limit\": 0,\n                \"predicateOnly\": true\n            }\n            ```\n\n        Args:\n            table (str): the table to lookup\n            year (str): which acs5 year to look up\n\n        Returns:\n            str | Any: variables\n        \"\"\"\n        file_name = f\"{year}-acs5-subject-groups-{table}.json\"\n        groups_url = (\n            f\"https://api.census.gov/data/{year}/acs/acs5/subject/groups/{table}.json\"\n        )\n        groups_to_label_translation = self.get_and_cache_data(file_name, groups_url)\n        if groups_to_label_translation is False:\n            log(\"Something is wrong with groups label dict\", \"warn\")\n            return None\n        return groups_to_label_translation[\"variables\"]  # type: ignore\n\n    def _translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(\n        self, headers: list[str], table: str, year: str\n    ) -> None:\n        \"\"\"Gets the label name for a table and row for the acs5 profile surveys.\n\n        Args:\n            headers (list[str]): headers\n            table (str): table\n            year (str): year\n        \"\"\"\n        # is going to read the file multiple times, save last req as {\"table\": req_json[table]...} for this?\n        groups_to_label_translation_dict = self._get_acs5_subject_table_to_group_name(\n            table, year\n        )\n        if groups_to_label_translation_dict is None:\n            log(\"Could not translate headers\", \"warn\")\n            return groups_to_label_translation_dict\n\n        for idx, header in enumerate(headers):\n            new_col_name_dict = groups_to_label_translation_dict.get(header)\n            if new_col_name_dict is None:\n                # returns none if not in dict, means we have custom name and can continue\n                continue\n            new_col_name = new_col_name_dict[\"label\"]\n            # qgis doesnt allow field names of 80+ chars. massage into form, then cut off\n            # delimiter for table subsection\n            new_col_name = new_col_name.replace(\"$\", \"D\")\n            new_col_name = new_col_name.replace(\",\", \"\")\n            new_col_name = new_col_name.replace(\"'\", \"\")\n            new_col_name = re.sub(r\"\\s+\", \" \", new_col_name)\n            new_col_name = new_col_name.replace(\"!!\", \" \")\n            # easier to read\n            new_col_name_parts = new_col_name.split(\" \")\n            for idy, no_format in enumerate(new_col_name_parts):\n                new_col_name_parts[idy] = no_format.capitalize()\n            new_col_name = \"\".join(new_col_name_parts)\n            # shortenings to fit length requirement\n            for key, value in REPLACEMENT_DICT.items():\n                new_col_name = re.sub(key, value, new_col_name)\n            # limiter\n            new_col_name = new_col_name[\n                : min(len(new_col_name), self.MAX_COL_NAME_LENGTH)\n            ]\n\n            if new_col_name not in headers[:idx]:\n                headers[idx] = new_col_name\n\n    def generate_acs5_subject_table_group_for_zcta_by_year(\n        self, table: str, year: str\n    ) -> str:\n        \"\"\"CSV output of a acs 5 year subject survey table\n\n        Args:\n            table (str): census acs5 table\n            year (str): year to search\n        \"\"\"\n        file_name = f\"{year}-acs-subject-table-{table}.json\"\n        url = f\"https://api.census.gov/data/{year}/acs/acs5/subject?get=group({table})&for=zip%20code%20tabulation%20area:*\"\n        list_of_list_table_json = self.get_and_cache_data(file_name, url)\n        if list_of_list_table_json is False:\n            log(\n                f\"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.\",\n                \"warn\",\n            )\n            return \"\"\n\n        self._translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(\n            list_of_list_table_json[0],  # type: ignore\n            table,\n            year,  # type: ignore\n        )\n\n        df = pl.DataFrame(list_of_list_table_json, orient=\"row\")\n        # funky stuff to get the first list to be the name of the columns\n        df = (\n            df.rename(df.head(1).to_dicts().pop())\n            .slice(1)  # type: ignore\n            .drop(\"NAME\", cs.matches(\"(?i)^(ann)\"), cs.matches(f\"(?i){table}\"))\n            .rename({\"zip code tabulation area\": \"ZCTA\"})\n            .cast(\n                {\n                    \"ZCTA\": pl.Int32,\n                }\n            )\n        )\n        table_file_name = CENSUS_DATA_DIR_PATH / f\"acs5-subject-group-{table}-zcta.csv\"\n        # may not have to write. but cache func doesn't return whether it hits or not\n        df.write_csv(table_file_name)\n        return str(table_file_name)\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.generate_acs5_profile_table_group_for_zcta_by_year","title":"generate_acs5_profile_table_group_for_zcta_by_year(table, year)","text":"

    CSV output of an acs 5 year profile survey table.

    TODO

    Update func name

    Parameters:

    Name Type Description Default table str

    census demo acs5 table

    required year str

    year to search

    required

    Returns:

    Name Type Description str str

    file path where output is saved

    Source code in src\\backend\\secondarydata.py
    def generate_acs5_profile_table_group_for_zcta_by_year(\n    self, table: str, year: str\n) -> str:\n    \"\"\"CSV output of an acs 5 year profile survey table.\n\n    TODO:\n        Update func name\n\n    Args:\n        table (str): census demo acs5 table\n        year (str): year to search\n\n    Returns:\n        str: file path where output is saved\n    \"\"\"\n    file_name = f\"{year}-acs-profile-table-{table}.json\"\n    url = f\"https://api.census.gov/data/{year}/acs/acs5/profile?get=group({table})&for=zip%20code%20tabulation%20area:*\"\n    list_of_list_table_json = self.get_and_cache_data(file_name, url)\n\n    if list_of_list_table_json is False:\n        log(\n            f\"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.\",\n            \"warn\",\n        )\n        return \"\"\n\n    self._translate_and_truncate_unique_acs5_profile_groups_to_labels_for_header_list(\n        list_of_list_table_json[0],  # type: ignore\n        table,\n        year,  # type: ignore\n    )\n\n    df = pl.DataFrame(list_of_list_table_json, orient=\"row\")\n    # funky stuff to get the first list to be the name of the columns\n    df = (\n        df.rename(df.head(1).to_dicts().pop())\n        .slice(1)  # type: ignore\n        .drop(\"NAME\", cs.matches(\"(?i)^(ann)\"), cs.matches(f\"(?i){table}\"))\n        .rename({\"zip code tabulation area\": \"ZCTA\"})\n        .cast(\n            {\n                \"ZCTA\": pl.Int32,\n            }\n        )\n    )\n    table_file_name = CENSUS_DATA_DIR_PATH / f\"acs5-profile-group-{table}-zcta.csv\"\n    df.write_csv(table_file_name)\n    return str(table_file_name)\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.generate_acs5_subject_table_group_for_zcta_by_year","title":"generate_acs5_subject_table_group_for_zcta_by_year(table, year)","text":"

    CSV output of a acs 5 year subject survey table

    Parameters:

    Name Type Description Default table str

    census acs5 table

    required year str

    year to search

    required Source code in src\\backend\\secondarydata.py
    def generate_acs5_subject_table_group_for_zcta_by_year(\n    self, table: str, year: str\n) -> str:\n    \"\"\"CSV output of a acs 5 year subject survey table\n\n    Args:\n        table (str): census acs5 table\n        year (str): year to search\n    \"\"\"\n    file_name = f\"{year}-acs-subject-table-{table}.json\"\n    url = f\"https://api.census.gov/data/{year}/acs/acs5/subject?get=group({table})&for=zip%20code%20tabulation%20area:*\"\n    list_of_list_table_json = self.get_and_cache_data(file_name, url)\n    if list_of_list_table_json is False:\n        log(\n            f\"Could not load table {table}. Perhaps the api is down or there was an error saving/reading the file.\",\n            \"warn\",\n        )\n        return \"\"\n\n    self._translate_and_truncate_unique_acs5_subject_groups_to_labels_for_header_list(\n        list_of_list_table_json[0],  # type: ignore\n        table,\n        year,  # type: ignore\n    )\n\n    df = pl.DataFrame(list_of_list_table_json, orient=\"row\")\n    # funky stuff to get the first list to be the name of the columns\n    df = (\n        df.rename(df.head(1).to_dicts().pop())\n        .slice(1)  # type: ignore\n        .drop(\"NAME\", cs.matches(\"(?i)^(ann)\"), cs.matches(f\"(?i){table}\"))\n        .rename({\"zip code tabulation area\": \"ZCTA\"})\n        .cast(\n            {\n                \"ZCTA\": pl.Int32,\n            }\n        )\n    )\n    table_file_name = CENSUS_DATA_DIR_PATH / f\"acs5-subject-group-{table}-zcta.csv\"\n    # may not have to write. but cache func doesn't return whether it hits or not\n    df.write_csv(table_file_name)\n    return str(table_file_name)\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.get_and_cache_data","title":"get_and_cache_data(file_name, url_to_lookup_on_miss)","text":"

    Cache files.

    Parameters:

    Name Type Description Default file_name str

    file name to save/lookup

    required url_to_lookup_on_miss str

    the Census url to lookup

    required

    Returns:

    Type Description dict[str, str] | bool

    bool | dict[str, str] | None | Any: the dict of tablename: label or

    Source code in src\\backend\\secondarydata.py
    def get_and_cache_data(\n    self, file_name: str, url_to_lookup_on_miss: str\n) -> dict[str, str] | bool:\n    \"\"\"Cache files.\n\n    Args:\n        file_name (str): file name to save/lookup\n        url_to_lookup_on_miss (str): the Census url to lookup\n\n    Returns:\n        bool | dict[str, str] | None | Any: the dict of `tablename: label` or\n    \"\"\"\n    CENSUS_DATA_CACHE_PATH.mkdir(parents=True, exist_ok=True)\n\n    my_json = None\n\n    try:\n        with open(CENSUS_DATA_CACHE_PATH / file_name, mode=\"r\") as f:\n            log(f\"Reading {file_name}\", \"debug\")\n            try:\n                my_json = json.load(f)\n            except json.JSONDecodeError:\n                log(\"Could not decode cached census file\", \"error\")\n                return False\n    except FileNotFoundError:\n        req = self._get(url_to_lookup_on_miss)\n        log(f\"Getting {url_to_lookup_on_miss}...\", \"info\")\n        if req is None:\n            log(f\"Could not get census file {file_name}.\", \"error\")\n            return False\n        req.raise_for_status()\n        my_json = req.json()\n        with open(CENSUS_DATA_CACHE_PATH / file_name, \"w\") as f:\n            json.dump(my_json, f)\n\n    return my_json\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.CensusDataRetriever.get_race_makeup_by_zcta","title":"get_race_makeup_by_zcta(zcta)","text":"

    Get race make up by zcta from. DO NOT USE

    Note

    use get_table_group_for_zcta_by_state_by_year

    Parameters:

    Name Type Description Default zcta str

    zcta

    required

    Returns:

    Type Description str | None

    str | None: text or none

    Source code in src\\backend\\secondarydata.py
    def get_race_makeup_by_zcta(self, zcta: str) -> str | None:\n    \"\"\"Get race make up by zcta from. DO NOT USE\n\n    Note:\n        use `get_table_group_for_zcta_by_state_by_year`\n\n    Args:\n        zcta (str): zcta\n\n    Returns:\n        str | None: text or none\n    \"\"\"\n    # get white, black, american indian/native alaskan, asian, NH/PI, other. note that these are estimates, margin of error can be had with \"M\"\n    req = self._get(\n        f\"https://api.census.gov/data/2021/acs/acs5/profile?get=DP05_0064E,DP05_0065E,DP05_0066E,DP05_0067E,DP05_0068E,DP05_0069E&for=zip%20code%20tabulation%20area:{zcta}&key={self.api_key}\"\n    )\n    if req is None:\n        return None\n    return req.text\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever","title":"EIADataRetriever","text":"

    Interact with the EIA open data API.

    Note

    This is the \"manual\" for this API: https://www.eia.gov/opendata/pdf/EIA-APIv2-HandsOn-Webinar-11-Jan-23.pdf

    Source code in src\\backend\\secondarydata.py
    class EIADataRetriever:\n    \"\"\"Interact with the EIA open data API.\n\n    Note:\n        This is the \"manual\" for this API:\n        https://www.eia.gov/opendata/pdf/EIA-APIv2-HandsOn-Webinar-11-Jan-23.pdf\n    \"\"\"\n\n    HEATING_OIL_STATES_ABBR = {\n        sts.CT.abbr,\n        sts.DC.abbr,\n        sts.DE.abbr,\n        sts.IA.abbr,\n        sts.IL.abbr,\n        sts.IN.abbr,\n        sts.KS.abbr,\n        sts.KY.abbr,\n        sts.MA.abbr,\n        sts.MD.abbr,\n        sts.ME.abbr,\n        sts.MI.abbr,\n        sts.MN.abbr,\n        sts.MO.abbr,\n        sts.NC.abbr,\n        sts.ND.abbr,\n        sts.NE.abbr,\n        sts.NH.abbr,\n        sts.NJ.abbr,\n        sts.NY.abbr,\n        sts.OH.abbr,\n        sts.PA.abbr,\n        sts.RI.abbr,\n        sts.SD.abbr,\n        sts.VA.abbr,\n        sts.VT.abbr,\n        sts.WI.abbr,\n    }\n\n    PROPANE_STATES_ABBR = {\n        sts.AL.abbr,\n        sts.AR.abbr,\n        sts.CO.abbr,\n        sts.CT.abbr,\n        sts.DE.abbr,\n        sts.FL.abbr,\n        sts.GA.abbr,\n        sts.IL.abbr,\n        sts.IN.abbr,\n        sts.KS.abbr,\n        sts.KY.abbr,\n        sts.KY.abbr,\n        sts.MA.abbr,\n        sts.MD.abbr,\n        sts.ME.abbr,\n        sts.MI.abbr,\n        sts.MN.abbr,\n        sts.MO.abbr,\n        sts.MS.abbr,\n        sts.MT.abbr,\n        sts.NC.abbr,\n        sts.ND.abbr,\n        sts.NE.abbr,\n        sts.NH.abbr,\n        sts.NJ.abbr,\n        sts.NY.abbr,\n        sts.OH.abbr,\n        sts.OK.abbr,\n        sts.PA.abbr,\n        sts.RI.abbr,\n        sts.SD.abbr,\n        sts.TN.abbr,\n        sts.TX.abbr,\n        sts.UT.abbr,\n        sts.VA.abbr,\n        sts.VT.abbr,\n        sts.WI.abbr,\n    }\n\n    class HeaterEfficiencies(Enum):\n        \"\"\"Combination of system efficiency and distribution efficiency.\n\n        Note:\n            Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/\n        \"\"\"\n\n        HEAT_PUMP_GEOTHERMAL = 3.69\n        HEAT_PUMP_DUCTLESS = 2.7  # mini split\n        HEAT_PUMP_DUCTED = 2.16\n        BASEBOARD = 1\n        KEROSENE_ROOM_HEATER = 0.87\n        PROPANE_BOILER = 0.837\n        NG_BOILER = 0.828\n        NG_ROOM_HEATER = 0.81\n        PROPANE_ROOM_HEATER = 0.81\n        OIL_BOILER = 0.783\n        WOOD_STOVE = 0.75\n        PELLET_STOVE = 0.75\n        NG_FURNACE = 0.744  #! double check this value\n        PROPANE_FURNACE = 0.744\n        OIL_FURNACE = 0.704\n        PELLET_BOILER = 0.639\n\n    class EnergyType(Enum):\n        PROPANE = 1\n        HEATING_OIL = 2\n        NATURAL_GAS = 3\n        ELECTRICITY = 4\n\n    class PetroleumProductTypes(StrEnum):\n        NATURAL_GAS = \"EPG0\"\n        PROPANE = \"EPLLPA\"\n        HEATING_OIL = \"EPD2F\"\n\n    class FuelBTUConversion(Enum):\n        # https://www.edf.org/sites/default/files/10071_EDF_BottomBarrel_Ch3.pdf\n        # https://www.eia.gov/energyexplained/units-and-calculators/british-thermal-units.php\n        # https://www.eia.gov/energyexplained/units-and-calculators/\n        NO1_OIL_BTU_PER_GAL = 135_000\n        NO2_OIL_BTU_PER_GAL = 140_000\n        NO4_OIL_BTU_PER_GAL = 146_000\n        NO5_OIL_BTU_PER_GAL = 144_500\n        NO6_OIL_BTU_PER_GAL = 150_000\n        HEATING_OIL_BTU_PER_GAL = 138_500\n        ELECTRICITY_BTU_PER_KWH = 3_412.14\n        NG_BTU_PER_MCT = 1_036_000  # 1000 cubic feet of gas\n        NG_BTU_PER_THERM = 100_000\n        PROPANE_BTU_PER_GAL = 91_452\n        WOOD_BTU_PER_CORD = 20_000_000\n\n    def __init__(self):\n        self.eia_base_url = \"https://api.eia.gov/v2\"\n        self.api_key = os.getenv(\"EIA_API_KEY\")\n        if self.api_key is None:\n            log(\n                \"No Census API key found in a .env file in project directory. please request a key at https://www.eia.gov/opendata/register.php\",\n                \"critical\",\n            )\n            exit()\n\n    def price_per_mbtu_with_efficiency(\n        self, energy_price_dict: dict\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Convert an energy source's price per quantity into price per BTU with an efficiency.\n\n        Note:\n            Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf\n\n        See also:\n            `EIADataRetriever.HeaterEfficiencies`\n\n        Args:\n            energy_price_dict (dict): energy source json\n\n        Returns:\n            dict: new dictionary with btu centric pricing\n        \"\"\"\n        #! make new function based on burner type/ end usage type\n        CENTS_IN_DOLLAR = 100\n        match energy_price_dict.get(\"type\"):\n            case self.EnergyType.PROPANE.value:\n                # for loop is done for every case since i dont want to use `eval` or parse a string of division to keep PEMDAS. this is why i dont have an efficiency func yet\n                for key, value in energy_price_dict.items():\n                    if (\n                        key in [\"type\", \"state\", None]\n                        or energy_price_dict.get(key) is None\n                    ):\n                        continue\n                    energy_price_dict[key] = (\n                        value\n                        / (\n                            self.FuelBTUConversion.PROPANE_BTU_PER_GAL.value\n                            * self.HeaterEfficiencies.PROPANE_FURNACE.value\n                        )\n                        * 1_000\n                    )\n            case self.EnergyType.NATURAL_GAS.value:\n                for key, value in energy_price_dict.items():\n                    if (\n                        key in [\"type\", \"state\", None]\n                        or energy_price_dict.get(key) is None\n                    ):\n                        continue\n                    energy_price_dict[key] = (\n                        value\n                        / (\n                            self.FuelBTUConversion.NG_BTU_PER_MCT.value\n                            * self.HeaterEfficiencies.NG_FURNACE.value\n                        )\n                        * 1_000\n                    )\n            case self.EnergyType.ELECTRICITY.value:\n                for key, value in energy_price_dict.items():\n                    if (\n                        key in [\"type\", \"state\", None]\n                        or energy_price_dict.get(key) is None\n                    ):\n                        continue\n                    energy_price_dict[key] = (\n                        value\n                        / CENTS_IN_DOLLAR\n                        / (\n                            self.FuelBTUConversion.ELECTRICITY_BTU_PER_KWH.value\n                            * self.HeaterEfficiencies.HEAT_PUMP_DUCTED.value\n                        )\n                        * 1_000\n                    )\n            case self.EnergyType.HEATING_OIL.value:\n                for key, value in energy_price_dict.items():\n                    if (\n                        key in [\"type\", \"state\", None]\n                        or energy_price_dict.get(key) is None\n                    ):\n                        continue\n                    energy_price_dict[key] = (\n                        value\n                        / (\n                            self.FuelBTUConversion.HEATING_OIL_BTU_PER_GAL.value\n                            * self.HeaterEfficiencies.OIL_BOILER.value\n                        )\n                        * 1_000\n                    )\n            case _:\n                log(\"Could not translate dict to btu per price.\", \"warn\")\n\n        return energy_price_dict\n\n    # api to dict handler Helpers\n    def price_dict_to_clean_dict(\n        self, eia_json: dict, energy_type: EnergyType, state: str\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Clean JSON data returned by EIA's API.\n\n        Args:\n            eia_json (dict): the response JSON\n            energy_type (EnergyType): the energy type\n            state (str): the state\n\n        Returns:\n            dict[str, str | EnergyType | float]: cleaned JSON\n        \"\"\"\n        # price key is different for electricity\n        accessor = \"value\"\n        if \"product\" not in eia_json[\"response\"][\"data\"][0]:\n            accessor = \"price\"\n\n        result_dict = {\n            entry[\"period\"]: entry[f\"{accessor}\"]\n            for entry in eia_json[\"response\"][\"data\"]\n        }\n        result_dict[\"type\"] = energy_type.value\n        result_dict[\"state\"] = state\n\n        return result_dict\n\n    def price_df_to_clean_dict(\n        self, eia_df: pl.DataFrame, energy_type: EnergyType, state: str\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Clean DataFrame data consisting of EIA API data.\n\n        Args:\n            eia_df (pl.DataFrame): the DataFrame to clean\n            energy_type (EnergyType): the energy type\n            state (str): the state\n\n        Returns:\n            dict[str, str|EnergyType|float]: the dict\n        \"\"\"\n        result_dict = {}\n        for row in eia_df.rows(named=True):\n            year_month = f\"{row.get(\"year\")}-{row.get(\"month\"):02}\"\n            if row.get(\"monthly_avg_price\") is not None:\n                result_dict[year_month] = round(row.get(\"monthly_avg_price\"), 3)  # type: ignore\n        result_dict[\"type\"] = energy_type.value\n        result_dict[\"state\"] = state\n        return result_dict\n\n    # api to dict handler\n    def price_to_clean_dict(\n        self, price_struct: dict | pl.DataFrame, energy_type: EnergyType, state: str\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Handle the different data types that EIA data could be stored in.\n\n        Args:\n            price_struct (dict | pl.DataFrame): a data structure containing the year, month, and price info\n            energy_type (EnergyType): the energy type\n            state (str): the state\n\n        Raises:\n            TypeError: raised if the type of `price_struct` is not supported\n\n        Returns:\n            dict[str, str|EnergyType|float]: the normalized and structured data in dict form\n        \"\"\"\n        match price_struct:\n            case dict():\n                return self.price_dict_to_clean_dict(price_struct, energy_type, state)\n            case pl.DataFrame():\n                return self.price_df_to_clean_dict(price_struct, energy_type, state)\n            case _:\n                raise TypeError(f\"Type not supported: {type(energy_type)}\")\n\n    # api interaction\n    def monthly_electricity_price_per_kwh(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> dict[str, Any]:\n        \"\"\"Get a state's average monthly energy price.\n\n        Note:\n            Data is returned in cents/KWh.\n\n        Args:\n            state (str): the 2 character postal code of a state\n            start_date (datetime.date): the start date, inclusive\n            end_date (datetime.date): the end date, non inclusive\n\n        Returns:\n            dict: the dictionary in `year-month: price` form\n        \"\"\"\n        url = f\"{self.eia_base_url}/electricity/retail-sales/data/?frequency=monthly&data[0]=price&facets[stateid][]={state}&facets[sectorid][]=RES&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n        eia_request = req_get_wrapper(url)\n        eia_request.raise_for_status()\n\n        return eia_request.json()\n\n    def monthly_ng_price_per_mcf(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> dict[str, Any]:\n        \"\"\"Get a state's average natural gas price.\n\n        Note:\n            Data is returned in dollars per mega cubic feet.\n\n        Args:\n            state (str): the 2 character postal code of a state\n            start_date (datetime.date): the start date, inclusive\n            end_date (datetime.date): the end date, non inclusive\n\n        Returns:\n            dict: _description_\n        \"\"\"\n        # $/mcf\n        url = f\"https://api.eia.gov/v2/natural-gas/pri/sum/data/?frequency=monthly&data[0]=value&facets[duoarea][]=S{state}&facets[process][]=PRS&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n        eia_request = req_get_wrapper(url)\n        eia_request.raise_for_status()\n\n        return eia_request.json()\n\n    def monthly_heating_season_heating_oil_price_per_gal(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> pl.DataFrame:\n        \"\"\"Get a state's average heating oil price.\n\n        Note:\n            Data returned is in dollars per gallon.\n\n            Only these states are tracked, and only for the months October through March:\n                * CT\n                * DC\n                * DE\n                * IA\n                * IL\n                * IN\n                * KS\n                * KY\n                * MA\n                * MD\n                * ME\n                * MI\n                * MN\n                * MO\n                * NC\n                * ND\n                * NE\n                * NH\n                * NJ\n                * NY\n                * OH\n                * PA\n                * RI\n                * SD\n                * VA\n                * VT\n                * WI\n        Args:\n            state (str): 2 char postal code\n            start_date (datetime.date): the start date, inclusive\n            end_date (datetime.date): the end date, non inclusive\n\n        Returns:\n            dict: _description_\n        \"\"\"\n        # heating season is Oct - march, $/gal\n        url = f\"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[duoarea][]=S{state}&facets[product][]=EPD2F&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n        eia_request = req_get_wrapper(url)\n        eia_request.raise_for_status()\n\n        json = eia_request.json()\n        df = pl.DataFrame(json[\"response\"][\"data\"])\n        # becomes int, so months are sig figs\n        df = df.with_columns(pl.col(\"period\").str.strptime(pl.Date))\n        df = df.with_columns(\n            pl.col(\"period\").dt.year().alias(\"year\"),\n            pl.col(\"period\").dt.month().alias(\"month\"),\n        )\n\n        monthly_avg_price = (\n            df.group_by([\"year\", \"month\"])\n            .agg(pl.col(\"value\").mean().alias(\"monthly_avg_price\"))\n            .sort(\"year\", \"month\")\n        )\n\n        return monthly_avg_price\n\n    def monthly_heating_season_propane_price_per_gal(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> pl.DataFrame:\n        \"\"\"Get a state's average propane price in dollars per gal.\n\n        Note:\n            Only these states are tracked, and only for the months October through Marc:\n                * AL\n                * AR\n                * CO\n                * CT\n                * DE\n                * FL\n                * GA\n                * IL\n                * IN\n                * KS\n                * KY\n                * KY\n                * MA\n                * MD\n                * ME\n                * MI\n                * MN\n                * MO\n                * MS\n                * MT\n                * NC\n                * ND\n                * NE\n                * NH\n                * NJ\n                * NY\n                * OH\n                * OK\n                * PA\n                * RI\n                * SD\n                * TN\n                * TX\n                * UT\n                * VA\n                * VT\n                * WI\n\n        Args:\n            state (str): 2 character postal code\n            start_date (datetime.date): the start date, inclusive\n            end_date (datetime.date): the end date, non inclusive\n\n        Returns:\n            dict: _description_\n        \"\"\"\n        # heating season is Oct - march, $/gal\n        url = f\"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[process][]=PRS&facets[duoarea][]=S{state}&facets[product][]=EPLLPA&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n        eia_request = req_get_wrapper(url)\n        eia_request.raise_for_status()\n\n        json = eia_request.json()\n        # return self.price_json_to_dict(eia_request.json())\n        df = pl.DataFrame(json[\"response\"][\"data\"])\n        # df = df.with_columns(pl.col(\"period\").str.to_date().alias(\"period\"))\n        df = df.with_columns(pl.col(\"period\").str.strptime(pl.Date))\n        df = df.with_columns(\n            pl.col(\"period\").dt.year().alias(\"year\"),\n            pl.col(\"period\").dt.month().alias(\"month\"),\n        )\n\n        monthly_avg_price = (\n            df.group_by([\"year\", \"month\"])\n            .agg(pl.col(\"value\").mean().alias(\"monthly_avg_price\"))\n            .sort(\"year\", \"month\")\n        )\n\n        return monthly_avg_price\n\n    def monthly_price_per_mbtu_by_energy_type(\n        self,\n        energy_type: EnergyType,\n        state: str,\n        start_date: datetime.date,\n        end_date: datetime.date,\n    ) -> dict[str, str | EnergyType | float]:\n        \"\"\"Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation\n        for changes to data collection during certain years.\n\n        Args:\n            energy_type (EnergyType): The energy type\n            state (str): the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected\n            start_date (datetime.date): the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned\n            end_date (datetime.date): the date for which to end the search. Non inclusive\n\n        Raises:\n            NotImplementedError: Invalid energy type\n\n        Returns:\n            dict: year-month: price in USD to BTU\n        \"\"\"\n        if len(state) > 2:\n            state = sts.lookup(state).abbr  # type: ignore\n        match energy_type:\n            case self.EnergyType.PROPANE:\n                return self.price_per_mbtu_with_efficiency(\n                    self.price_to_clean_dict(\n                        self.monthly_heating_season_propane_price_per_gal(\n                            state, start_date, end_date\n                        ),\n                        energy_type,\n                        state,\n                    )\n                )\n            case self.EnergyType.NATURAL_GAS:\n                return self.price_per_mbtu_with_efficiency(\n                    self.price_to_clean_dict(\n                        self.monthly_ng_price_per_mcf(state, start_date, end_date),\n                        energy_type,\n                        state,\n                    )\n                )\n            case self.EnergyType.ELECTRICITY:\n                return self.price_per_mbtu_with_efficiency(\n                    self.price_to_clean_dict(\n                        self.monthly_electricity_price_per_kwh(\n                            state, start_date, end_date\n                        ),\n                        energy_type,\n                        state,\n                    )\n                )\n            case self.EnergyType.HEATING_OIL:\n                return self.price_per_mbtu_with_efficiency(\n                    self.price_to_clean_dict(\n                        self.monthly_heating_season_heating_oil_price_per_gal(\n                            state, start_date, end_date\n                        ),\n                        energy_type,\n                        state,\n                    )\n                )\n            case _:\n                raise NotImplementedError(f\"Unsupported energy type: {energy_type}\")\n\n    def monthly_price_per_mbtu_by_energy_type_by_state(\n        self, state: str, start_date: datetime.date, end_date: datetime.date\n    ) -> list[Any]:\n        \"\"\"Get all available energy prices per MBTU, taking efficiency into account, for a state.\n\n        Note:\n            Please keep times to within a year. For the non oil and propane, you have to go a month past.\n\n        Args:\n            state (str): 2 character postal code\n            start_date (datetime.date): start date\n            end_date (datetime.date): end date\n\n        Returns:\n            list[Any]: list of price dicts for available energy types for a state\n        \"\"\"\n        if len(state) > 2:\n            state = sts.lookup(state).abbr  # type: ignore\n\n        dicts_to_return = []\n        if state in self.HEATING_OIL_STATES_ABBR:\n            dicts_to_return.append(\n                self.monthly_price_per_mbtu_by_energy_type(\n                    self.EnergyType.HEATING_OIL, state, start_date, end_date\n                )\n            )\n        if state in self.PROPANE_STATES_ABBR:\n            dicts_to_return.append(\n                self.monthly_price_per_mbtu_by_energy_type(\n                    self.EnergyType.PROPANE, state, start_date, end_date\n                )\n            )\n        dicts_to_return.append(\n            self.monthly_price_per_mbtu_by_energy_type(\n                self.EnergyType.NATURAL_GAS, state, start_date, end_date\n            )\n        )\n        dicts_to_return.append(\n            self.monthly_price_per_mbtu_by_energy_type(\n                self.EnergyType.ELECTRICITY, state, start_date, end_date\n            )\n        )\n        log(f\"{dicts_to_return = }\", \"debug\")\n        return dicts_to_return\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.HeaterEfficiencies","title":"HeaterEfficiencies","text":"

    Bases: Enum

    Combination of system efficiency and distribution efficiency.

    Note

    Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/

    Source code in src\\backend\\secondarydata.py
    class HeaterEfficiencies(Enum):\n    \"\"\"Combination of system efficiency and distribution efficiency.\n\n    Note:\n        Numbers taken from https://www.efficiencymaine.com/at-home/heating-cost-comparison/\n    \"\"\"\n\n    HEAT_PUMP_GEOTHERMAL = 3.69\n    HEAT_PUMP_DUCTLESS = 2.7  # mini split\n    HEAT_PUMP_DUCTED = 2.16\n    BASEBOARD = 1\n    KEROSENE_ROOM_HEATER = 0.87\n    PROPANE_BOILER = 0.837\n    NG_BOILER = 0.828\n    NG_ROOM_HEATER = 0.81\n    PROPANE_ROOM_HEATER = 0.81\n    OIL_BOILER = 0.783\n    WOOD_STOVE = 0.75\n    PELLET_STOVE = 0.75\n    NG_FURNACE = 0.744  #! double check this value\n    PROPANE_FURNACE = 0.744\n    OIL_FURNACE = 0.704\n    PELLET_BOILER = 0.639\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_electricity_price_per_kwh","title":"monthly_electricity_price_per_kwh(state, start_date, end_date)","text":"

    Get a state's average monthly energy price.

    Note

    Data is returned in cents/KWh.

    Parameters:

    Name Type Description Default state str

    the 2 character postal code of a state

    required start_date date

    the start date, inclusive

    required end_date date

    the end date, non inclusive

    required

    Returns:

    Name Type Description dict dict[str, Any]

    the dictionary in year-month: price form

    Source code in src\\backend\\secondarydata.py
    def monthly_electricity_price_per_kwh(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> dict[str, Any]:\n    \"\"\"Get a state's average monthly energy price.\n\n    Note:\n        Data is returned in cents/KWh.\n\n    Args:\n        state (str): the 2 character postal code of a state\n        start_date (datetime.date): the start date, inclusive\n        end_date (datetime.date): the end date, non inclusive\n\n    Returns:\n        dict: the dictionary in `year-month: price` form\n    \"\"\"\n    url = f\"{self.eia_base_url}/electricity/retail-sales/data/?frequency=monthly&data[0]=price&facets[stateid][]={state}&facets[sectorid][]=RES&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n    eia_request = req_get_wrapper(url)\n    eia_request.raise_for_status()\n\n    return eia_request.json()\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_heating_season_heating_oil_price_per_gal","title":"monthly_heating_season_heating_oil_price_per_gal(state, start_date, end_date)","text":"

    Get a state's average heating oil price.

    Note

    Data returned is in dollars per gallon.

    Only these states are tracked, and only for the months October through March: * CT * DC * DE * IA * IL * IN * KS * KY * MA * MD * ME * MI * MN * MO * NC * ND * NE * NH * NJ * NY * OH * PA * RI * SD * VA * VT * WI

    Args: state (str): 2 char postal code start_date (datetime.date): the start date, inclusive end_date (datetime.date): the end date, non inclusive

    Returns:

    Name Type Description dict DataFrame

    description

    Source code in src\\backend\\secondarydata.py
    def monthly_heating_season_heating_oil_price_per_gal(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> pl.DataFrame:\n    \"\"\"Get a state's average heating oil price.\n\n    Note:\n        Data returned is in dollars per gallon.\n\n        Only these states are tracked, and only for the months October through March:\n            * CT\n            * DC\n            * DE\n            * IA\n            * IL\n            * IN\n            * KS\n            * KY\n            * MA\n            * MD\n            * ME\n            * MI\n            * MN\n            * MO\n            * NC\n            * ND\n            * NE\n            * NH\n            * NJ\n            * NY\n            * OH\n            * PA\n            * RI\n            * SD\n            * VA\n            * VT\n            * WI\n    Args:\n        state (str): 2 char postal code\n        start_date (datetime.date): the start date, inclusive\n        end_date (datetime.date): the end date, non inclusive\n\n    Returns:\n        dict: _description_\n    \"\"\"\n    # heating season is Oct - march, $/gal\n    url = f\"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[duoarea][]=S{state}&facets[product][]=EPD2F&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n    eia_request = req_get_wrapper(url)\n    eia_request.raise_for_status()\n\n    json = eia_request.json()\n    df = pl.DataFrame(json[\"response\"][\"data\"])\n    # becomes int, so months are sig figs\n    df = df.with_columns(pl.col(\"period\").str.strptime(pl.Date))\n    df = df.with_columns(\n        pl.col(\"period\").dt.year().alias(\"year\"),\n        pl.col(\"period\").dt.month().alias(\"month\"),\n    )\n\n    monthly_avg_price = (\n        df.group_by([\"year\", \"month\"])\n        .agg(pl.col(\"value\").mean().alias(\"monthly_avg_price\"))\n        .sort(\"year\", \"month\")\n    )\n\n    return monthly_avg_price\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_heating_season_propane_price_per_gal","title":"monthly_heating_season_propane_price_per_gal(state, start_date, end_date)","text":"

    Get a state's average propane price in dollars per gal.

    Note

    Only these states are tracked, and only for the months October through Marc: * AL * AR * CO * CT * DE * FL * GA * IL * IN * KS * KY * KY * MA * MD * ME * MI * MN * MO * MS * MT * NC * ND * NE * NH * NJ * NY * OH * OK * PA * RI * SD * TN * TX * UT * VA * VT * WI

    Parameters:

    Name Type Description Default state str

    2 character postal code

    required start_date date

    the start date, inclusive

    required end_date date

    the end date, non inclusive

    required

    Returns:

    Name Type Description dict DataFrame

    description

    Source code in src\\backend\\secondarydata.py
    def monthly_heating_season_propane_price_per_gal(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> pl.DataFrame:\n    \"\"\"Get a state's average propane price in dollars per gal.\n\n    Note:\n        Only these states are tracked, and only for the months October through Marc:\n            * AL\n            * AR\n            * CO\n            * CT\n            * DE\n            * FL\n            * GA\n            * IL\n            * IN\n            * KS\n            * KY\n            * KY\n            * MA\n            * MD\n            * ME\n            * MI\n            * MN\n            * MO\n            * MS\n            * MT\n            * NC\n            * ND\n            * NE\n            * NH\n            * NJ\n            * NY\n            * OH\n            * OK\n            * PA\n            * RI\n            * SD\n            * TN\n            * TX\n            * UT\n            * VA\n            * VT\n            * WI\n\n    Args:\n        state (str): 2 character postal code\n        start_date (datetime.date): the start date, inclusive\n        end_date (datetime.date): the end date, non inclusive\n\n    Returns:\n        dict: _description_\n    \"\"\"\n    # heating season is Oct - march, $/gal\n    url = f\"https://api.eia.gov/v2/petroleum/pri/wfr/data/?frequency=weekly&data[0]=value&facets[process][]=PRS&facets[duoarea][]=S{state}&facets[product][]=EPLLPA&start={start_date}&end={end_date}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n    eia_request = req_get_wrapper(url)\n    eia_request.raise_for_status()\n\n    json = eia_request.json()\n    # return self.price_json_to_dict(eia_request.json())\n    df = pl.DataFrame(json[\"response\"][\"data\"])\n    # df = df.with_columns(pl.col(\"period\").str.to_date().alias(\"period\"))\n    df = df.with_columns(pl.col(\"period\").str.strptime(pl.Date))\n    df = df.with_columns(\n        pl.col(\"period\").dt.year().alias(\"year\"),\n        pl.col(\"period\").dt.month().alias(\"month\"),\n    )\n\n    monthly_avg_price = (\n        df.group_by([\"year\", \"month\"])\n        .agg(pl.col(\"value\").mean().alias(\"monthly_avg_price\"))\n        .sort(\"year\", \"month\")\n    )\n\n    return monthly_avg_price\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_ng_price_per_mcf","title":"monthly_ng_price_per_mcf(state, start_date, end_date)","text":"

    Get a state's average natural gas price.

    Note

    Data is returned in dollars per mega cubic feet.

    Parameters:

    Name Type Description Default state str

    the 2 character postal code of a state

    required start_date date

    the start date, inclusive

    required end_date date

    the end date, non inclusive

    required

    Returns:

    Name Type Description dict dict[str, Any]

    description

    Source code in src\\backend\\secondarydata.py
    def monthly_ng_price_per_mcf(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> dict[str, Any]:\n    \"\"\"Get a state's average natural gas price.\n\n    Note:\n        Data is returned in dollars per mega cubic feet.\n\n    Args:\n        state (str): the 2 character postal code of a state\n        start_date (datetime.date): the start date, inclusive\n        end_date (datetime.date): the end date, non inclusive\n\n    Returns:\n        dict: _description_\n    \"\"\"\n    # $/mcf\n    url = f\"https://api.eia.gov/v2/natural-gas/pri/sum/data/?frequency=monthly&data[0]=value&facets[duoarea][]=S{state}&facets[process][]=PRS&start={start_date.year}-{start_date.month:02}&end={end_date.year}-{end_date.month:02}&sort[0][column]=period&sort[0][direction]=asc&api_key={self.api_key}\"\n\n    eia_request = req_get_wrapper(url)\n    eia_request.raise_for_status()\n\n    return eia_request.json()\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_price_per_mbtu_by_energy_type","title":"monthly_price_per_mbtu_by_energy_type(energy_type, state, start_date, end_date)","text":"

    Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation for changes to data collection during certain years.

    Parameters:

    Name Type Description Default energy_type EnergyType

    The energy type

    required state str

    the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected

    required start_date date

    the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned

    required end_date date

    the date for which to end the search. Non inclusive

    required

    Raises:

    Type Description NotImplementedError

    Invalid energy type

    Returns:

    Name Type Description dict dict[str, str | EnergyType | float]

    year-month: price in USD to BTU

    Source code in src\\backend\\secondarydata.py
    def monthly_price_per_mbtu_by_energy_type(\n    self,\n    energy_type: EnergyType,\n    state: str,\n    start_date: datetime.date,\n    end_date: datetime.date,\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Get the cost per MBTU for the given energy type for the state, over the given period of time. Refer to EIA's documentation\n    for changes to data collection during certain years.\n\n    Args:\n        energy_type (EnergyType): The energy type\n        state (str): the 2 character postal abbreviation. Note that for heating oil, only certain states have this information collected\n        start_date (datetime.date): the date for which to start the search. Inclusive. Not that for heating oil, only heating months will be returned\n        end_date (datetime.date): the date for which to end the search. Non inclusive\n\n    Raises:\n        NotImplementedError: Invalid energy type\n\n    Returns:\n        dict: year-month: price in USD to BTU\n    \"\"\"\n    if len(state) > 2:\n        state = sts.lookup(state).abbr  # type: ignore\n    match energy_type:\n        case self.EnergyType.PROPANE:\n            return self.price_per_mbtu_with_efficiency(\n                self.price_to_clean_dict(\n                    self.monthly_heating_season_propane_price_per_gal(\n                        state, start_date, end_date\n                    ),\n                    energy_type,\n                    state,\n                )\n            )\n        case self.EnergyType.NATURAL_GAS:\n            return self.price_per_mbtu_with_efficiency(\n                self.price_to_clean_dict(\n                    self.monthly_ng_price_per_mcf(state, start_date, end_date),\n                    energy_type,\n                    state,\n                )\n            )\n        case self.EnergyType.ELECTRICITY:\n            return self.price_per_mbtu_with_efficiency(\n                self.price_to_clean_dict(\n                    self.monthly_electricity_price_per_kwh(\n                        state, start_date, end_date\n                    ),\n                    energy_type,\n                    state,\n                )\n            )\n        case self.EnergyType.HEATING_OIL:\n            return self.price_per_mbtu_with_efficiency(\n                self.price_to_clean_dict(\n                    self.monthly_heating_season_heating_oil_price_per_gal(\n                        state, start_date, end_date\n                    ),\n                    energy_type,\n                    state,\n                )\n            )\n        case _:\n            raise NotImplementedError(f\"Unsupported energy type: {energy_type}\")\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.monthly_price_per_mbtu_by_energy_type_by_state","title":"monthly_price_per_mbtu_by_energy_type_by_state(state, start_date, end_date)","text":"

    Get all available energy prices per MBTU, taking efficiency into account, for a state.

    Note

    Please keep times to within a year. For the non oil and propane, you have to go a month past.

    Parameters:

    Name Type Description Default state str

    2 character postal code

    required start_date date

    start date

    required end_date date

    end date

    required

    Returns:

    Type Description list[Any]

    list[Any]: list of price dicts for available energy types for a state

    Source code in src\\backend\\secondarydata.py
    def monthly_price_per_mbtu_by_energy_type_by_state(\n    self, state: str, start_date: datetime.date, end_date: datetime.date\n) -> list[Any]:\n    \"\"\"Get all available energy prices per MBTU, taking efficiency into account, for a state.\n\n    Note:\n        Please keep times to within a year. For the non oil and propane, you have to go a month past.\n\n    Args:\n        state (str): 2 character postal code\n        start_date (datetime.date): start date\n        end_date (datetime.date): end date\n\n    Returns:\n        list[Any]: list of price dicts for available energy types for a state\n    \"\"\"\n    if len(state) > 2:\n        state = sts.lookup(state).abbr  # type: ignore\n\n    dicts_to_return = []\n    if state in self.HEATING_OIL_STATES_ABBR:\n        dicts_to_return.append(\n            self.monthly_price_per_mbtu_by_energy_type(\n                self.EnergyType.HEATING_OIL, state, start_date, end_date\n            )\n        )\n    if state in self.PROPANE_STATES_ABBR:\n        dicts_to_return.append(\n            self.monthly_price_per_mbtu_by_energy_type(\n                self.EnergyType.PROPANE, state, start_date, end_date\n            )\n        )\n    dicts_to_return.append(\n        self.monthly_price_per_mbtu_by_energy_type(\n            self.EnergyType.NATURAL_GAS, state, start_date, end_date\n        )\n    )\n    dicts_to_return.append(\n        self.monthly_price_per_mbtu_by_energy_type(\n            self.EnergyType.ELECTRICITY, state, start_date, end_date\n        )\n    )\n    log(f\"{dicts_to_return = }\", \"debug\")\n    return dicts_to_return\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.price_df_to_clean_dict","title":"price_df_to_clean_dict(eia_df, energy_type, state)","text":"

    Clean DataFrame data consisting of EIA API data.

    Parameters:

    Name Type Description Default eia_df DataFrame

    the DataFrame to clean

    required energy_type EnergyType

    the energy type

    required state str

    the state

    required

    Returns:

    Type Description dict[str, str | EnergyType | float]

    dict[str, str|EnergyType|float]: the dict

    Source code in src\\backend\\secondarydata.py
    def price_df_to_clean_dict(\n    self, eia_df: pl.DataFrame, energy_type: EnergyType, state: str\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Clean DataFrame data consisting of EIA API data.\n\n    Args:\n        eia_df (pl.DataFrame): the DataFrame to clean\n        energy_type (EnergyType): the energy type\n        state (str): the state\n\n    Returns:\n        dict[str, str|EnergyType|float]: the dict\n    \"\"\"\n    result_dict = {}\n    for row in eia_df.rows(named=True):\n        year_month = f\"{row.get(\"year\")}-{row.get(\"month\"):02}\"\n        if row.get(\"monthly_avg_price\") is not None:\n            result_dict[year_month] = round(row.get(\"monthly_avg_price\"), 3)  # type: ignore\n    result_dict[\"type\"] = energy_type.value\n    result_dict[\"state\"] = state\n    return result_dict\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.price_dict_to_clean_dict","title":"price_dict_to_clean_dict(eia_json, energy_type, state)","text":"

    Clean JSON data returned by EIA's API.

    Parameters:

    Name Type Description Default eia_json dict

    the response JSON

    required energy_type EnergyType

    the energy type

    required state str

    the state

    required

    Returns:

    Type Description dict[str, str | EnergyType | float]

    dict[str, str | EnergyType | float]: cleaned JSON

    Source code in src\\backend\\secondarydata.py
    def price_dict_to_clean_dict(\n    self, eia_json: dict, energy_type: EnergyType, state: str\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Clean JSON data returned by EIA's API.\n\n    Args:\n        eia_json (dict): the response JSON\n        energy_type (EnergyType): the energy type\n        state (str): the state\n\n    Returns:\n        dict[str, str | EnergyType | float]: cleaned JSON\n    \"\"\"\n    # price key is different for electricity\n    accessor = \"value\"\n    if \"product\" not in eia_json[\"response\"][\"data\"][0]:\n        accessor = \"price\"\n\n    result_dict = {\n        entry[\"period\"]: entry[f\"{accessor}\"]\n        for entry in eia_json[\"response\"][\"data\"]\n    }\n    result_dict[\"type\"] = energy_type.value\n    result_dict[\"state\"] = state\n\n    return result_dict\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.price_per_mbtu_with_efficiency","title":"price_per_mbtu_with_efficiency(energy_price_dict)","text":"

    Convert an energy source's price per quantity into price per BTU with an efficiency.

    Note

    Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf

    See also

    EIADataRetriever.HeaterEfficiencies

    Parameters:

    Name Type Description Default energy_price_dict dict

    energy source json

    required

    Returns:

    Name Type Description dict dict[str, str | EnergyType | float]

    new dictionary with btu centric pricing

    Source code in src\\backend\\secondarydata.py
    def price_per_mbtu_with_efficiency(\n    self, energy_price_dict: dict\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Convert an energy source's price per quantity into price per BTU with an efficiency.\n\n    Note:\n        Efficiency data taken from https://portfoliomanager.energystar.gov/pdf/reference/Thermal%20Conversions.pdf\n\n    See also:\n        `EIADataRetriever.HeaterEfficiencies`\n\n    Args:\n        energy_price_dict (dict): energy source json\n\n    Returns:\n        dict: new dictionary with btu centric pricing\n    \"\"\"\n    #! make new function based on burner type/ end usage type\n    CENTS_IN_DOLLAR = 100\n    match energy_price_dict.get(\"type\"):\n        case self.EnergyType.PROPANE.value:\n            # for loop is done for every case since i dont want to use `eval` or parse a string of division to keep PEMDAS. this is why i dont have an efficiency func yet\n            for key, value in energy_price_dict.items():\n                if (\n                    key in [\"type\", \"state\", None]\n                    or energy_price_dict.get(key) is None\n                ):\n                    continue\n                energy_price_dict[key] = (\n                    value\n                    / (\n                        self.FuelBTUConversion.PROPANE_BTU_PER_GAL.value\n                        * self.HeaterEfficiencies.PROPANE_FURNACE.value\n                    )\n                    * 1_000\n                )\n        case self.EnergyType.NATURAL_GAS.value:\n            for key, value in energy_price_dict.items():\n                if (\n                    key in [\"type\", \"state\", None]\n                    or energy_price_dict.get(key) is None\n                ):\n                    continue\n                energy_price_dict[key] = (\n                    value\n                    / (\n                        self.FuelBTUConversion.NG_BTU_PER_MCT.value\n                        * self.HeaterEfficiencies.NG_FURNACE.value\n                    )\n                    * 1_000\n                )\n        case self.EnergyType.ELECTRICITY.value:\n            for key, value in energy_price_dict.items():\n                if (\n                    key in [\"type\", \"state\", None]\n                    or energy_price_dict.get(key) is None\n                ):\n                    continue\n                energy_price_dict[key] = (\n                    value\n                    / CENTS_IN_DOLLAR\n                    / (\n                        self.FuelBTUConversion.ELECTRICITY_BTU_PER_KWH.value\n                        * self.HeaterEfficiencies.HEAT_PUMP_DUCTED.value\n                    )\n                    * 1_000\n                )\n        case self.EnergyType.HEATING_OIL.value:\n            for key, value in energy_price_dict.items():\n                if (\n                    key in [\"type\", \"state\", None]\n                    or energy_price_dict.get(key) is None\n                ):\n                    continue\n                energy_price_dict[key] = (\n                    value\n                    / (\n                        self.FuelBTUConversion.HEATING_OIL_BTU_PER_GAL.value\n                        * self.HeaterEfficiencies.OIL_BOILER.value\n                    )\n                    * 1_000\n                )\n        case _:\n            log(\"Could not translate dict to btu per price.\", \"warn\")\n\n    return energy_price_dict\n
    "},{"location":"backend/secondarydata/#backend.secondarydata.EIADataRetriever.price_to_clean_dict","title":"price_to_clean_dict(price_struct, energy_type, state)","text":"

    Handle the different data types that EIA data could be stored in.

    Parameters:

    Name Type Description Default price_struct dict | DataFrame

    a data structure containing the year, month, and price info

    required energy_type EnergyType

    the energy type

    required state str

    the state

    required

    Raises:

    Type Description TypeError

    raised if the type of price_struct is not supported

    Returns:

    Type Description dict[str, str | EnergyType | float]

    dict[str, str|EnergyType|float]: the normalized and structured data in dict form

    Source code in src\\backend\\secondarydata.py
    def price_to_clean_dict(\n    self, price_struct: dict | pl.DataFrame, energy_type: EnergyType, state: str\n) -> dict[str, str | EnergyType | float]:\n    \"\"\"Handle the different data types that EIA data could be stored in.\n\n    Args:\n        price_struct (dict | pl.DataFrame): a data structure containing the year, month, and price info\n        energy_type (EnergyType): the energy type\n        state (str): the state\n\n    Raises:\n        TypeError: raised if the type of `price_struct` is not supported\n\n    Returns:\n        dict[str, str|EnergyType|float]: the normalized and structured data in dict form\n    \"\"\"\n    match price_struct:\n        case dict():\n            return self.price_dict_to_clean_dict(price_struct, energy_type, state)\n        case pl.DataFrame():\n            return self.price_df_to_clean_dict(price_struct, energy_type, state)\n        case _:\n            raise TypeError(f\"Type not supported: {type(energy_type)}\")\n
    "},{"location":"backend/us/states/","title":"States","text":""},{"location":"backend/us/states/#backend.us.states.State","title":"State","text":"Source code in src\\backend\\us\\states.py
    class State:\n    abbr: str\n    ap_abbr: Optional[str]\n    capital: Optional[str]\n    capital_tz: Optional[str]\n    fips: Optional[str]\n    is_territory: bool\n    is_obsolete: bool\n    is_contiguous: bool\n    is_continental: bool\n    name: str\n    name_metaphone: str\n    statehood_year: Optional[int]\n    time_zones: List[str]\n\n    def __init__(self, **kwargs):\n        for k, v in kwargs.items():\n            setattr(self, k, v)\n\n    def __repr__(self) -> str:\n        return f\"<State:{self.name}>\"\n\n    def __str__(self) -> str:\n        return self.name\n\n    def shapefile_urls(self) -> Optional[Dict[str, str]]:\n        \"\"\"Shapefiles are available directly from the US Census Bureau:\n        https://www.census.gov/cgi-bin/geo/shapefiles/index.php\n        \"\"\"\n\n        fips = self.fips\n\n        if not fips:\n            return None\n\n        base = \"https://www2.census.gov/geo/tiger/TIGER2010/\"\n        urls = {\n            \"tract\": urljoin(base, f\"TRACT/2010/tl_2010_{fips}_tract10.zip\"),\n            \"cd\": urljoin(base, f\"CD/111/tl_2010_{fips}_cd111.zip\"),\n            \"county\": urljoin(base, f\"COUNTY/2010/tl_2010_{fips}_county10.zip\"),\n            \"state\": urljoin(base, f\"STATE/2010/tl_2010_{fips}_state10.zip\"),\n            \"zcta\": urljoin(base, f\"ZCTA5/2010/tl_2010_{fips}_zcta510.zip\"),\n            \"block\": urljoin(base, f\"TABBLOCK/2010/tl_2010_{fips}_tabblock10.zip\"),\n            \"blockgroup\": urljoin(base, f\"BG/2010/tl_2010_{fips}_bg10.zip\"),\n        }\n\n        return urls\n
    "},{"location":"backend/us/states/#backend.us.states.State.shapefile_urls","title":"shapefile_urls()","text":"

    Shapefiles are available directly from the US Census Bureau: https://www.census.gov/cgi-bin/geo/shapefiles/index.php

    Source code in src\\backend\\us\\states.py
    def shapefile_urls(self) -> Optional[Dict[str, str]]:\n    \"\"\"Shapefiles are available directly from the US Census Bureau:\n    https://www.census.gov/cgi-bin/geo/shapefiles/index.php\n    \"\"\"\n\n    fips = self.fips\n\n    if not fips:\n        return None\n\n    base = \"https://www2.census.gov/geo/tiger/TIGER2010/\"\n    urls = {\n        \"tract\": urljoin(base, f\"TRACT/2010/tl_2010_{fips}_tract10.zip\"),\n        \"cd\": urljoin(base, f\"CD/111/tl_2010_{fips}_cd111.zip\"),\n        \"county\": urljoin(base, f\"COUNTY/2010/tl_2010_{fips}_county10.zip\"),\n        \"state\": urljoin(base, f\"STATE/2010/tl_2010_{fips}_state10.zip\"),\n        \"zcta\": urljoin(base, f\"ZCTA5/2010/tl_2010_{fips}_zcta510.zip\"),\n        \"block\": urljoin(base, f\"TABBLOCK/2010/tl_2010_{fips}_tabblock10.zip\"),\n        \"blockgroup\": urljoin(base, f\"BG/2010/tl_2010_{fips}_bg10.zip\"),\n    }\n\n    return urls\n
    "},{"location":"backend/us/states/#backend.us.states.lookup","title":"lookup(val, field=None, use_cache=True)","text":"

    State lookup. This method will make a best effort attempt at finding the state based on the lookup value provided.

    • two digits will search for FIPS code
    • two letters will search for state abbreviation

    Exact matches can be done on any attribute on State objects by passing the field argument. This does an exact, case-sensitive comparison against the specified field.

    This method caches non-None results, but can the cache can be bypassed with the use_cache=False argument.

    Source code in src\\backend\\us\\states.py
    def lookup(val, field: Optional[str] = None, use_cache: bool = True) -> Optional[State]:\n    \"\"\"State lookup. This method will make a best effort\n    attempt at finding the state based on the lookup value provided.\n\n      * two digits will search for FIPS code\n      * two letters will search for state abbreviation\n\n    Exact matches can be done on any attribute on State objects by passing\n    the `field` argument. This does an exact, case-sensitive comparison against\n    the specified field.\n\n    This method caches non-None results, but can the cache can be bypassed\n    with the `use_cache=False` argument.\n    \"\"\"\n\n    matched_state = None\n\n    if field is None:\n        if FIPS_RE.match(val):\n            field = \"fips\"\n        elif ABBR_RE.match(val):\n            val = val.upper()\n            field = \"abbr\"\n        else:\n            val = val.title()\n            field = \"name\"\n\n    # see if result is in cache\n    cache_key = f\"{field}:{val}\"\n    if use_cache and cache_key in _lookup_cache:\n        matched_state = _lookup_cache[cache_key]\n\n    for state in STATES_AND_TERRITORIES:\n        if val == getattr(state, field):\n            matched_state = state\n            if use_cache:\n                _lookup_cache[cache_key] = state\n\n    return matched_state\n
    "},{"location":"backend/us/unitedstatesofamerica/","title":"Unitedstatesofamerica","text":""},{"location":"gui/app/","title":"App","text":""},{"location":"gui/datapage/","title":"Datapage","text":""},{"location":"gui/datapage/#gui.datapage.DataPage","title":"DataPage","text":"

    Bases: CTkFrame

    Crate page for displaying energy data and links to censusreporter.org for census level data

    Source code in src\\gui\\datapage.py
    class DataPage(ctk.CTkFrame):\n    \"\"\"Crate page for displaying energy data and links to censusreporter.org for census level data\"\"\"\n\n    def __init__(self, master, **kwargs):\n        super().__init__(master, **kwargs)\n        self.msa_name = None\n        self.income_df = None\n        self.demog_df = None\n        self.states_in_msa = None\n        self.state_demog_dfs = None\n        self.state_income_dfs = None\n        self.cur_year = datetime.datetime.now().year\n        self.years = [\n            str(self.cur_year),\n            str(self.cur_year - 1),\n            str(self.cur_year - 2),\n            str(self.cur_year - 3),\n            str(self.cur_year - 4),\n        ]\n        self.roboto_font = ctk.CTkFont(family=\"Roboto\")\n        self.roboto_header_font = ctk.CTkFont(family=\"Roboto\", size=28)\n        self.roboto_link_font = ctk.CTkFont(family=\"Roboto\", underline=True, size=20)\n        self.create_widgets()\n\n    def create_widgets(self) -> None:\n        \"\"\"Create widgets.\"\"\"\n        # bug in sockets library wont allow you to raise keyboardinterrupt, so stopping\n        # Content frame will have 4 rows. first will be header, 2nd is energy graph, 3rd will contain a frame that has censusreport.org links, 4th will have progress bar frame\n        self.content_frame = ctk.CTkFrame(self)\n        self.content_banner_frame = ctk.CTkFrame(self.content_frame)\n        self.state_and_year_content_banner_dropdown_frame = ctk.CTkFrame(\n            self.content_banner_frame\n        )\n        self.census_reporter_frame = ctk.CTkFrame(self.content_frame)\n        self.log_frame = ctk.CTkFrame(self.content_frame)\n\n        self.content_banner_main_text = ctk.CTkLabel(\n            self.content_banner_frame,\n            text=\"Census and Energy Data:\",\n            font=self.roboto_header_font,\n        )\n        self.content_banner_main_text.bind(\n            \"<Configure>\",\n            command=lambda x: self.content_banner_main_text.configure(\n                wraplength=self.content_banner_main_text._current_width\n                - 40  # random padding\n            ),\n        )\n        # nested frame for holding filters and text inside banner frame\n\n        self.select_state_label = ctk.CTkLabel(\n            self.state_and_year_content_banner_dropdown_frame,\n            text=\"Select State\",\n            font=self.roboto_font,\n        )\n        self.select_state_dropdown = ctk.CTkOptionMenu(\n            self.state_and_year_content_banner_dropdown_frame,\n            values=None,\n            command=self.state_dropdown_callback,\n        )\n\n        self.select_year_label = ctk.CTkLabel(\n            self.state_and_year_content_banner_dropdown_frame,\n            text=\"Select Year\",\n            font=self.roboto_font,\n        )\n        self.select_year_dropdown = ctk.CTkOptionMenu(\n            self.state_and_year_content_banner_dropdown_frame,\n            values=self.years,\n            command=self.year_dropdown_callback,\n        )\n\n        self.energy_graph_frame = ctk.CTkFrame(self.content_frame)\n\n        self.census_reporter_state_label = ctk.CTkLabel(\n            self.census_reporter_frame,\n            text=\"Census Reporter: State Report\",\n            font=self.roboto_link_font,\n            cursor=\"hand2\",\n            text_color=\"blue\",\n        )\n\n        self.log_button = ctk.CTkButton(\n            self.log_frame, text=\"Open Log File\", command=self.open_log_file\n        )\n        self.census_button = ctk.CTkButton(\n            self.log_frame,\n            text=\"Generate Census data\",\n            command=self.generate_census_reports,\n        )\n        self.census_reporter_state_label.bind(\n            \"<Button-1>\", lambda x: self.open_census_reporter_state()\n        )\n        self.census_reporter_metro_label = ctk.CTkLabel(\n            self.census_reporter_frame,\n            text=\"Census Reporter: Metro Report\",\n            font=self.roboto_link_font,\n            cursor=\"hand2\",\n            text_color=\"blue\",\n        )\n        self.census_reporter_metro_label.bind(\n            \"<Button-1>\", lambda x: self.open_census_reporter_metro()\n        )\n        # create grid\n        # col\n        self.columnconfigure(0, weight=1)\n        self.content_frame.columnconfigure(0, weight=1)\n        self.content_banner_frame.columnconfigure((0, 1), weight=1)\n        self.state_and_year_content_banner_dropdown_frame.columnconfigure(\n            (0, 1), weight=1\n        )\n        self.energy_graph_frame.columnconfigure(0, weight=1)\n        self.census_reporter_frame.columnconfigure(0, weight=1)\n        self.log_frame.columnconfigure((0, 1), weight=1)\n\n        # row\n        self.rowconfigure(0, weight=1)\n\n        self.content_frame.rowconfigure(0, weight=1)  # banner\n        self.content_frame.rowconfigure(1, weight=5)  # energy graph\n        self.content_frame.rowconfigure(2, weight=2)  # census reporter frame\n        self.content_frame.rowconfigure(3, weight=1)\n\n        self.content_banner_frame.rowconfigure(0, weight=1)\n\n        self.state_and_year_content_banner_dropdown_frame.rowconfigure((0, 1), weight=1)\n\n        self.energy_graph_frame.rowconfigure(0, weight=1)\n\n        self.census_reporter_frame.rowconfigure((0, 1), weight=1)\n\n        self.log_frame.rowconfigure(0, weight=1)\n\n        # placement\n        self.content_frame.grid(column=0, row=0, sticky=\"news\")\n\n        self.content_banner_frame.grid(column=0, row=0, sticky=\"news\")\n\n        self.content_banner_main_text.grid(column=0, row=0, sticky=\"nsew\")\n\n        self.state_and_year_content_banner_dropdown_frame.grid(\n            column=1, row=0, sticky=\"news\"\n        )\n\n        self.select_state_label.grid(column=0, row=0, sticky=\"news\")\n        self.select_year_label.grid(column=1, row=0, sticky=\"news\")\n        self.select_state_dropdown.grid(column=0, row=1)\n        self.select_year_dropdown.grid(column=1, row=1)\n\n        self.energy_graph_frame.grid(column=0, row=1, sticky=\"news\")\n\n        self.census_reporter_frame.grid(column=0, row=2, sticky=\"news\")\n        self.census_reporter_state_label.grid(column=0, row=0)\n        self.census_reporter_metro_label.grid(column=0, row=1)\n\n        self.log_frame.grid(column=0, row=3, sticky=\"news\")\n        self.census_button.grid(column=0, row=0, pady=10, padx=(0, 10))\n        self.log_button.grid(column=1, row=0, pady=10, padx=(10, 0))\n\n    def set_msa_name(self, msa_name: str) -> None:\n        \"\"\"Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.\n\n        Args:\n            msa_name (str): Metropolitan Statistical Area name. This must be validated\n        \"\"\"\n        self.msa_name = msa_name\n        self.states_in_msa = helper.get_states_in_msa(self.msa_name)\n\n        if len(self.states_in_msa) > 0:\n            self.select_state_dropdown.configure()\n            self.select_state_dropdown.set(self.states_in_msa[0])\n\n        self.select_state_dropdown.configure(values=self.states_in_msa)\n        self.content_banner_main_text.configure(\n            text=f\"Census and Energy Data: {self.msa_name}\"\n        )\n        self.zip_list = helper.metro_name_to_zip_code_list(msa_name)\n        self.zip_list = [str(zip) for zip in self.zip_list]\n\n        threading.Thread(\n            target=self.generate_energy_plot,\n            args=(\n                int(self.select_year_dropdown.get()),\n                self.select_state_dropdown.get(),\n            ),\n            daemon=True,\n        ).start()\n\n    def generate_energy_plot(self, year: int, state: str) -> None:\n        \"\"\"Call the EIA API and generate a plot with the received data.\n\n        Note:\n            Call this in a thread so that it doesn't freeze the GUI\n            Update: might want to just get the data and plot on the main thread\n        \"\"\"\n        eia = EIADataRetriever()\n        energy_price_per_mbtu_by_type_for_state = (\n            eia.monthly_price_per_mbtu_by_energy_type_by_state(\n                state, datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)\n            )\n        )\n\n        fig = Figure(layout=\"compressed\", facecolor=\"#dbdbdb\")\n        ax = fig.add_subplot()\n        ax.set_xlabel(\"Time (Months)\")\n        ax.set_ylabel(\"Cost per Effective MBTU ($/MBTU)\")\n        ax.set_title(\n            f\"Avg. Energy Prices by Appliance for {state}, {year}\",\n            loc=\"center\",\n            wrap=True,\n        )\n        months = [i for i in range(1, 13)]\n        month_names = [\n            \"Jan\",\n            \"Feb\",\n            \"Mar\",\n            \"Apr\",\n            \"May\",\n            \"Jun\",\n            \"Jul\",\n            \"Aug\",\n            \"Sep\",\n            \"Oct\",\n            \"Nov\",\n            \"Dec\",\n        ]\n        ax.set_xticks(months)\n        labels = [item.get_text() for item in ax.get_xticklabels()]\n\n        # Modify specific labels, keeping offset\n        for i in range(0, 12):\n            labels[i] = month_names[i]\n        ax.set_xticklabels(labels)\n\n        for energy_dict in energy_price_per_mbtu_by_type_for_state:\n            if len(energy_dict) < 3:\n                log(\n                    f\"Issue with energy type {energy_dict.get(\"type\")} for state {energy_dict.get(\"state\")}\",\n                    \"debug\",\n                )\n                continue\n            match energy_dict.get(\"type\"):\n                case EIADataRetriever.EnergyType.PROPANE.value:\n                    result_list = []\n                    for month in months:\n                        key = f\"{year}-{month:02}\"\n                        val = energy_dict.get(key, float(\"NaN\"))\n                        if val is None:\n                            val = float(\"NaN\")\n                        result_list.append(val)\n                    ax.plot(months, result_list, label=\"Propane Furnace\")\n                case EIADataRetriever.EnergyType.HEATING_OIL.value:\n                    result_list = []\n                    for month in months:\n                        key = f\"{year}-{month:02}\"\n                        val = energy_dict.get(key, float(\"NaN\"))\n                        if val is None:\n                            val = float(\"NaN\")\n                        result_list.append(val)\n                    ax.plot(months, result_list, label=\"Heating Oil Boiler\")\n                case EIADataRetriever.EnergyType.NATURAL_GAS.value:\n                    result_list = []\n                    for month in months:\n                        key = f\"{year}-{month:02}\"\n                        val = energy_dict.get(key, float(\"NaN\"))\n                        if val is None:\n                            val = float(\"NaN\")\n                        result_list.append(val)\n                    ax.plot(months, result_list, label=\"Natural Gas Furnace\")\n                case EIADataRetriever.EnergyType.ELECTRICITY.value:\n                    result_list = []\n                    for month in months:\n                        key = f\"{year}-{month:02}\"\n                        val = energy_dict.get(key, float(\"NaN\"))\n                        if val is None:\n                            val = float(\"NaN\")\n                        result_list.append(val)\n                    ax.plot(months, result_list, label=\"Ducted Heat Pump\")\n        ax.legend()\n        with threading.Lock():\n            canvas = FigureCanvasTkAgg(fig, master=self.energy_graph_frame)\n            canvas.draw()\n\n            # toolbar = NavigationToolbar2Tk(canvas, window=self.energy_graph_frame, pack_toolbar=False)\n            # toolbar.update()\n            # canvas.mpl_connect(\"key_press_event\", key_press_handler)\n\n            # toolbar.grid(column=0, row=1, sticky=\"news\")\n            canvas.get_tk_widget().grid(column=0, row=0)\n\n    def open_census_reporter_state(self) -> None:\n        \"\"\"Census reporter state label callback\"\"\"\n        state_link = helper.get_census_report_url_page(\n            sts.lookup(self.select_state_dropdown.get()).name  # type: ignore\n        )\n        webbrowser.open_new_tab(state_link)\n\n    def open_census_reporter_metro(self) -> None:\n        \"\"\"Census reporter metro label callback\"\"\"\n        metro_link = helper.get_census_report_url_page(f\"{self.msa_name} metro area\")  # type: ignore\n        webbrowser.open_new_tab(metro_link)\n\n    def state_dropdown_callback(self, state: str) -> None:\n        \"\"\"Banner state callback.\n        TODO:\n            check if thread is running with given name, and if so join it and start the new thread\n\n        Args:\n            state (str): the state after the change\n        \"\"\"\n\n        threading.Thread(\n            target=self.generate_energy_plot,\n            args=(\n                int(self.select_year_dropdown.get()),\n                state,\n            ),\n            name=\"energy_thread\",\n            daemon=True,\n        ).start()\n\n    def year_dropdown_callback(self, year: str) -> None:\n        \"\"\"Banner year callback.\n        TODO:\n            Check if thread is running with given name, and if so join it and start the new thread\n\n        Args:\n            year (str): the year after the change\n        \"\"\"\n        threading.Thread(\n            target=self.generate_energy_plot,\n            args=(\n                int(year),\n                self.select_state_dropdown.get(),\n            ),\n            name=\"energy_thread\",\n            daemon=True,\n        ).start()\n\n    def open_log_file(self) -> None:\n        \"\"\"Open logging file.\n\n        Note:\n            Haven't tested this on mac/linux. \"darwin\" doesn't exist in `system.platform` on windows, so cant say for sure if this works\n        \"\"\"\n        try:\n            if sys.platform == \"win32\":\n                from os import startfile\n\n                startfile(helper.LOGGING_FILE_PATH)\n            else:\n                opener = \"open\" if sys.platform == \"darwin\" else \"xdg-open\"\n                subprocess.call([opener, helper.LOGGING_FILE_PATH])\n        except FileNotFoundError:\n            CTkMessagebox(\n                self,\n                title=\"Error\",\n                message=\"Logging file doesn't exist! Try rerunning the program or creating a logger.log file in /output/logging/\",\n                icon=\"warning\",\n            )\n\n    def generate_census_reports(self) -> None:\n        log(\"Fetching census reports...\", \"info\")\n        c = CensusDataRetriever()\n        threading.Thread(\n            target=c.generate_acs5_subject_table_group_for_zcta_by_year,\n            args=(\n                \"S1901\",\n                \"2019\",\n            ),\n        ).start()\n        threading.Thread(\n            target=c.generate_acs5_profile_table_group_for_zcta_by_year,\n            args=(\n                \"DP05\",\n                \"2019\",\n            ),\n        ).start()\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.create_widgets","title":"create_widgets()","text":"

    Create widgets.

    Source code in src\\gui\\datapage.py
    def create_widgets(self) -> None:\n    \"\"\"Create widgets.\"\"\"\n    # bug in sockets library wont allow you to raise keyboardinterrupt, so stopping\n    # Content frame will have 4 rows. first will be header, 2nd is energy graph, 3rd will contain a frame that has censusreport.org links, 4th will have progress bar frame\n    self.content_frame = ctk.CTkFrame(self)\n    self.content_banner_frame = ctk.CTkFrame(self.content_frame)\n    self.state_and_year_content_banner_dropdown_frame = ctk.CTkFrame(\n        self.content_banner_frame\n    )\n    self.census_reporter_frame = ctk.CTkFrame(self.content_frame)\n    self.log_frame = ctk.CTkFrame(self.content_frame)\n\n    self.content_banner_main_text = ctk.CTkLabel(\n        self.content_banner_frame,\n        text=\"Census and Energy Data:\",\n        font=self.roboto_header_font,\n    )\n    self.content_banner_main_text.bind(\n        \"<Configure>\",\n        command=lambda x: self.content_banner_main_text.configure(\n            wraplength=self.content_banner_main_text._current_width\n            - 40  # random padding\n        ),\n    )\n    # nested frame for holding filters and text inside banner frame\n\n    self.select_state_label = ctk.CTkLabel(\n        self.state_and_year_content_banner_dropdown_frame,\n        text=\"Select State\",\n        font=self.roboto_font,\n    )\n    self.select_state_dropdown = ctk.CTkOptionMenu(\n        self.state_and_year_content_banner_dropdown_frame,\n        values=None,\n        command=self.state_dropdown_callback,\n    )\n\n    self.select_year_label = ctk.CTkLabel(\n        self.state_and_year_content_banner_dropdown_frame,\n        text=\"Select Year\",\n        font=self.roboto_font,\n    )\n    self.select_year_dropdown = ctk.CTkOptionMenu(\n        self.state_and_year_content_banner_dropdown_frame,\n        values=self.years,\n        command=self.year_dropdown_callback,\n    )\n\n    self.energy_graph_frame = ctk.CTkFrame(self.content_frame)\n\n    self.census_reporter_state_label = ctk.CTkLabel(\n        self.census_reporter_frame,\n        text=\"Census Reporter: State Report\",\n        font=self.roboto_link_font,\n        cursor=\"hand2\",\n        text_color=\"blue\",\n    )\n\n    self.log_button = ctk.CTkButton(\n        self.log_frame, text=\"Open Log File\", command=self.open_log_file\n    )\n    self.census_button = ctk.CTkButton(\n        self.log_frame,\n        text=\"Generate Census data\",\n        command=self.generate_census_reports,\n    )\n    self.census_reporter_state_label.bind(\n        \"<Button-1>\", lambda x: self.open_census_reporter_state()\n    )\n    self.census_reporter_metro_label = ctk.CTkLabel(\n        self.census_reporter_frame,\n        text=\"Census Reporter: Metro Report\",\n        font=self.roboto_link_font,\n        cursor=\"hand2\",\n        text_color=\"blue\",\n    )\n    self.census_reporter_metro_label.bind(\n        \"<Button-1>\", lambda x: self.open_census_reporter_metro()\n    )\n    # create grid\n    # col\n    self.columnconfigure(0, weight=1)\n    self.content_frame.columnconfigure(0, weight=1)\n    self.content_banner_frame.columnconfigure((0, 1), weight=1)\n    self.state_and_year_content_banner_dropdown_frame.columnconfigure(\n        (0, 1), weight=1\n    )\n    self.energy_graph_frame.columnconfigure(0, weight=1)\n    self.census_reporter_frame.columnconfigure(0, weight=1)\n    self.log_frame.columnconfigure((0, 1), weight=1)\n\n    # row\n    self.rowconfigure(0, weight=1)\n\n    self.content_frame.rowconfigure(0, weight=1)  # banner\n    self.content_frame.rowconfigure(1, weight=5)  # energy graph\n    self.content_frame.rowconfigure(2, weight=2)  # census reporter frame\n    self.content_frame.rowconfigure(3, weight=1)\n\n    self.content_banner_frame.rowconfigure(0, weight=1)\n\n    self.state_and_year_content_banner_dropdown_frame.rowconfigure((0, 1), weight=1)\n\n    self.energy_graph_frame.rowconfigure(0, weight=1)\n\n    self.census_reporter_frame.rowconfigure((0, 1), weight=1)\n\n    self.log_frame.rowconfigure(0, weight=1)\n\n    # placement\n    self.content_frame.grid(column=0, row=0, sticky=\"news\")\n\n    self.content_banner_frame.grid(column=0, row=0, sticky=\"news\")\n\n    self.content_banner_main_text.grid(column=0, row=0, sticky=\"nsew\")\n\n    self.state_and_year_content_banner_dropdown_frame.grid(\n        column=1, row=0, sticky=\"news\"\n    )\n\n    self.select_state_label.grid(column=0, row=0, sticky=\"news\")\n    self.select_year_label.grid(column=1, row=0, sticky=\"news\")\n    self.select_state_dropdown.grid(column=0, row=1)\n    self.select_year_dropdown.grid(column=1, row=1)\n\n    self.energy_graph_frame.grid(column=0, row=1, sticky=\"news\")\n\n    self.census_reporter_frame.grid(column=0, row=2, sticky=\"news\")\n    self.census_reporter_state_label.grid(column=0, row=0)\n    self.census_reporter_metro_label.grid(column=0, row=1)\n\n    self.log_frame.grid(column=0, row=3, sticky=\"news\")\n    self.census_button.grid(column=0, row=0, pady=10, padx=(0, 10))\n    self.log_button.grid(column=1, row=0, pady=10, padx=(10, 0))\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.generate_energy_plot","title":"generate_energy_plot(year, state)","text":"

    Call the EIA API and generate a plot with the received data.

    Note

    Call this in a thread so that it doesn't freeze the GUI Update: might want to just get the data and plot on the main thread

    Source code in src\\gui\\datapage.py
    def generate_energy_plot(self, year: int, state: str) -> None:\n    \"\"\"Call the EIA API and generate a plot with the received data.\n\n    Note:\n        Call this in a thread so that it doesn't freeze the GUI\n        Update: might want to just get the data and plot on the main thread\n    \"\"\"\n    eia = EIADataRetriever()\n    energy_price_per_mbtu_by_type_for_state = (\n        eia.monthly_price_per_mbtu_by_energy_type_by_state(\n            state, datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)\n        )\n    )\n\n    fig = Figure(layout=\"compressed\", facecolor=\"#dbdbdb\")\n    ax = fig.add_subplot()\n    ax.set_xlabel(\"Time (Months)\")\n    ax.set_ylabel(\"Cost per Effective MBTU ($/MBTU)\")\n    ax.set_title(\n        f\"Avg. Energy Prices by Appliance for {state}, {year}\",\n        loc=\"center\",\n        wrap=True,\n    )\n    months = [i for i in range(1, 13)]\n    month_names = [\n        \"Jan\",\n        \"Feb\",\n        \"Mar\",\n        \"Apr\",\n        \"May\",\n        \"Jun\",\n        \"Jul\",\n        \"Aug\",\n        \"Sep\",\n        \"Oct\",\n        \"Nov\",\n        \"Dec\",\n    ]\n    ax.set_xticks(months)\n    labels = [item.get_text() for item in ax.get_xticklabels()]\n\n    # Modify specific labels, keeping offset\n    for i in range(0, 12):\n        labels[i] = month_names[i]\n    ax.set_xticklabels(labels)\n\n    for energy_dict in energy_price_per_mbtu_by_type_for_state:\n        if len(energy_dict) < 3:\n            log(\n                f\"Issue with energy type {energy_dict.get(\"type\")} for state {energy_dict.get(\"state\")}\",\n                \"debug\",\n            )\n            continue\n        match energy_dict.get(\"type\"):\n            case EIADataRetriever.EnergyType.PROPANE.value:\n                result_list = []\n                for month in months:\n                    key = f\"{year}-{month:02}\"\n                    val = energy_dict.get(key, float(\"NaN\"))\n                    if val is None:\n                        val = float(\"NaN\")\n                    result_list.append(val)\n                ax.plot(months, result_list, label=\"Propane Furnace\")\n            case EIADataRetriever.EnergyType.HEATING_OIL.value:\n                result_list = []\n                for month in months:\n                    key = f\"{year}-{month:02}\"\n                    val = energy_dict.get(key, float(\"NaN\"))\n                    if val is None:\n                        val = float(\"NaN\")\n                    result_list.append(val)\n                ax.plot(months, result_list, label=\"Heating Oil Boiler\")\n            case EIADataRetriever.EnergyType.NATURAL_GAS.value:\n                result_list = []\n                for month in months:\n                    key = f\"{year}-{month:02}\"\n                    val = energy_dict.get(key, float(\"NaN\"))\n                    if val is None:\n                        val = float(\"NaN\")\n                    result_list.append(val)\n                ax.plot(months, result_list, label=\"Natural Gas Furnace\")\n            case EIADataRetriever.EnergyType.ELECTRICITY.value:\n                result_list = []\n                for month in months:\n                    key = f\"{year}-{month:02}\"\n                    val = energy_dict.get(key, float(\"NaN\"))\n                    if val is None:\n                        val = float(\"NaN\")\n                    result_list.append(val)\n                ax.plot(months, result_list, label=\"Ducted Heat Pump\")\n    ax.legend()\n    with threading.Lock():\n        canvas = FigureCanvasTkAgg(fig, master=self.energy_graph_frame)\n        canvas.draw()\n\n        # toolbar = NavigationToolbar2Tk(canvas, window=self.energy_graph_frame, pack_toolbar=False)\n        # toolbar.update()\n        # canvas.mpl_connect(\"key_press_event\", key_press_handler)\n\n        # toolbar.grid(column=0, row=1, sticky=\"news\")\n        canvas.get_tk_widget().grid(column=0, row=0)\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.open_census_reporter_metro","title":"open_census_reporter_metro()","text":"

    Census reporter metro label callback

    Source code in src\\gui\\datapage.py
    def open_census_reporter_metro(self) -> None:\n    \"\"\"Census reporter metro label callback\"\"\"\n    metro_link = helper.get_census_report_url_page(f\"{self.msa_name} metro area\")  # type: ignore\n    webbrowser.open_new_tab(metro_link)\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.open_census_reporter_state","title":"open_census_reporter_state()","text":"

    Census reporter state label callback

    Source code in src\\gui\\datapage.py
    def open_census_reporter_state(self) -> None:\n    \"\"\"Census reporter state label callback\"\"\"\n    state_link = helper.get_census_report_url_page(\n        sts.lookup(self.select_state_dropdown.get()).name  # type: ignore\n    )\n    webbrowser.open_new_tab(state_link)\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.open_log_file","title":"open_log_file()","text":"

    Open logging file.

    Note

    Haven't tested this on mac/linux. \"darwin\" doesn't exist in system.platform on windows, so cant say for sure if this works

    Source code in src\\gui\\datapage.py
    def open_log_file(self) -> None:\n    \"\"\"Open logging file.\n\n    Note:\n        Haven't tested this on mac/linux. \"darwin\" doesn't exist in `system.platform` on windows, so cant say for sure if this works\n    \"\"\"\n    try:\n        if sys.platform == \"win32\":\n            from os import startfile\n\n            startfile(helper.LOGGING_FILE_PATH)\n        else:\n            opener = \"open\" if sys.platform == \"darwin\" else \"xdg-open\"\n            subprocess.call([opener, helper.LOGGING_FILE_PATH])\n    except FileNotFoundError:\n        CTkMessagebox(\n            self,\n            title=\"Error\",\n            message=\"Logging file doesn't exist! Try rerunning the program or creating a logger.log file in /output/logging/\",\n            icon=\"warning\",\n        )\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.set_msa_name","title":"set_msa_name(msa_name)","text":"

    Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.

    Parameters:

    Name Type Description Default msa_name str

    Metropolitan Statistical Area name. This must be validated

    required Source code in src\\gui\\datapage.py
    def set_msa_name(self, msa_name: str) -> None:\n    \"\"\"Set the msa name and update objects that rely on the msa name. Includes drop downs and and generating the energy plot.\n\n    Args:\n        msa_name (str): Metropolitan Statistical Area name. This must be validated\n    \"\"\"\n    self.msa_name = msa_name\n    self.states_in_msa = helper.get_states_in_msa(self.msa_name)\n\n    if len(self.states_in_msa) > 0:\n        self.select_state_dropdown.configure()\n        self.select_state_dropdown.set(self.states_in_msa[0])\n\n    self.select_state_dropdown.configure(values=self.states_in_msa)\n    self.content_banner_main_text.configure(\n        text=f\"Census and Energy Data: {self.msa_name}\"\n    )\n    self.zip_list = helper.metro_name_to_zip_code_list(msa_name)\n    self.zip_list = [str(zip) for zip in self.zip_list]\n\n    threading.Thread(\n        target=self.generate_energy_plot,\n        args=(\n            int(self.select_year_dropdown.get()),\n            self.select_state_dropdown.get(),\n        ),\n        daemon=True,\n    ).start()\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.state_dropdown_callback","title":"state_dropdown_callback(state)","text":"

    Banner state callback. TODO: check if thread is running with given name, and if so join it and start the new thread

    Parameters:

    Name Type Description Default state str

    the state after the change

    required Source code in src\\gui\\datapage.py
    def state_dropdown_callback(self, state: str) -> None:\n    \"\"\"Banner state callback.\n    TODO:\n        check if thread is running with given name, and if so join it and start the new thread\n\n    Args:\n        state (str): the state after the change\n    \"\"\"\n\n    threading.Thread(\n        target=self.generate_energy_plot,\n        args=(\n            int(self.select_year_dropdown.get()),\n            state,\n        ),\n        name=\"energy_thread\",\n        daemon=True,\n    ).start()\n
    "},{"location":"gui/datapage/#gui.datapage.DataPage.year_dropdown_callback","title":"year_dropdown_callback(year)","text":"

    Banner year callback. TODO: Check if thread is running with given name, and if so join it and start the new thread

    Parameters:

    Name Type Description Default year str

    the year after the change

    required Source code in src\\gui\\datapage.py
    def year_dropdown_callback(self, year: str) -> None:\n    \"\"\"Banner year callback.\n    TODO:\n        Check if thread is running with given name, and if so join it and start the new thread\n\n    Args:\n        year (str): the year after the change\n    \"\"\"\n    threading.Thread(\n        target=self.generate_energy_plot,\n        args=(\n            int(year),\n            self.select_state_dropdown.get(),\n        ),\n        name=\"energy_thread\",\n        daemon=True,\n    ).start()\n
    "},{"location":"gui/filterspage/","title":"Filterspage","text":""},{"location":"gui/filterspage/#gui.filterspage.FiltersPage","title":"FiltersPage","text":"

    Bases: CTkFrame

    Source code in src\\gui\\filterspage.py
    class FiltersPage(ctk.CTkFrame):\n    def __init__(self, master: ctk.CTk, search_page: ctk.CTkFrame, **kwargs):\n        # main setup\n        super().__init__(master, **kwargs)\n        self.root = master\n        self.search_page = search_page\n        self.cur_year = datetime.datetime.now().year\n        self.year_list = [str(x) for x in range(2010, self.cur_year + 1)]\n        list.reverse(self.year_list)\n        self.sqft_list = [sqft.value for sqft in RedfinApi.Sqft]\n        list.reverse(self.sqft_list)\n        self.sold_within_list = [\n            \"Last 1 week\",\n            \"Last 1 month\",\n            \"Last 3 months\",\n            \"Last 6 months\",\n            \"Last 1 year\",\n            \"Last 2 years\",\n            \"Last 3 years\",\n            \"Last 5 years\",\n        ]\n        self.price_list = [price.value for price in RedfinApi.Price]\n        list.reverse(self.price_list)\n        self.create_widgets()\n        self.set_default_values()\n\n    def create_widgets(self) -> None:\n        \"\"\"Create widgets.\"\"\"\n        # frames\n        self.content_frame = ctk.CTkFrame(self)\n        self.for_sale_sold_frame = ctk.CTkFrame(\n            self.content_frame, width=300, height=100, fg_color=\"transparent\"\n        )\n        self.stories_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)\n        self.year_built_frame = ctk.CTkFrame(\n            self.content_frame, corner_radius=0, fg_color=\"transparent\"\n        )\n        self.home_type_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)\n        self.square_feet_frame = ctk.CTkFrame(\n            self.content_frame, corner_radius=0, fg_color=\"transparent\"\n        )\n        self.status_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)\n        self.sold_within_frame = ctk.CTkFrame(\n            self.content_frame, fg_color=\"transparent\", corner_radius=0\n        )\n        self.price_range_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)\n        self.reset_apply_frame = ctk.CTkFrame(\n            self.content_frame, fg_color=\"transparent\", corner_radius=0\n        )\n\n        # make more grid\n        self.columnconfigure((0, 2), weight=1)\n        self.columnconfigure(1, weight=30)\n        self.content_frame.columnconfigure(0, weight=1, uniform=\"a\")  # uniform\n        self.for_sale_sold_frame.columnconfigure((0, 1), weight=1)\n        self.stories_frame.columnconfigure((0, 1), weight=1)\n        self.year_built_frame.columnconfigure((0, 1, 2, 3), weight=1)\n        self.home_type_frame.columnconfigure((0, 1, 2, 3), weight=1)\n        self.square_feet_frame.columnconfigure((0, 1, 2, 3), weight=1)\n        self.status_frame.columnconfigure((0, 1, 2), weight=1)\n        self.sold_within_frame.columnconfigure((0, 1), weight=1)\n        self.price_range_frame.columnconfigure((0, 1, 2, 3), weight=1)\n        self.reset_apply_frame.columnconfigure((0, 1), weight=1)\n\n        self.rowconfigure(0, weight=1)\n        self.content_frame.rowconfigure(\n            (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), weight=1, uniform=\"a\"\n        )\n        self.for_sale_sold_frame.rowconfigure(0, weight=1)\n        self.stories_frame.rowconfigure(0, weight=1)\n        self.year_built_frame.rowconfigure((0, 1), weight=1)\n        self.home_type_frame.rowconfigure((0, 1, 2), weight=1)\n        self.square_feet_frame.rowconfigure((0, 1), weight=1)\n        self.status_frame.rowconfigure((0, 1), weight=1)\n        self.sold_within_frame.rowconfigure(0, weight=1)\n        self.price_range_frame.rowconfigure((0, 1), weight=1)\n        self.reset_apply_frame.rowconfigure(0, weight=1)\n\n        # placing the frames\n        self.content_frame.grid(row=0, column=1, sticky=\"ns\")\n        self.for_sale_sold_frame.grid(row=0, column=0, sticky=\"nsew\")\n        self.stories_frame.grid(row=1, column=0, sticky=\"nesw\")\n        self.year_built_frame.grid(row=2, column=0, sticky=\"nesw\")\n        self.home_type_frame.grid(row=3, column=0, rowspan=2, sticky=\"nesw\")\n        self.square_feet_frame.grid(row=5, column=0, sticky=\"nesw\")\n        self.status_frame.grid(row=6, column=0)\n        self.sold_within_frame.grid(row=7, column=0, sticky=\"nesw\")\n        self.price_range_frame.grid(row=8, column=0, rowspan=2, sticky=\"nesw\")\n        self.reset_apply_frame.grid(row=10, column=0)\n\n        # Create the labels\n        self.for_sale_sold_label = ctk.CTkLabel(\n            self.for_sale_sold_frame, text=\"For Sale/Sold\"\n        )\n        self.stories_label = ctk.CTkLabel(self.stories_frame, text=\"Stories\")\n        self.year_built_label = ctk.CTkLabel(self.year_built_frame, text=\"Year Built\")\n        self.home_type_label = ctk.CTkLabel(self.home_type_frame, text=\"Home Type\")\n        self.sqft_label = ctk.CTkLabel(self.square_feet_frame, text=\"Square Feet\")\n        self.sale_status_label = ctk.CTkLabel(self.status_frame, text=\"Status\")\n        self.price_range_label = ctk.CTkLabel(\n            self.price_range_frame, text=\"Price Range\"\n        )\n        self.price_range_from_label = ctk.CTkLabel(self.price_range_frame, text=\"From\")\n        self.price_range_to_label = ctk.CTkLabel(self.price_range_frame, text=\"To\")\n        self.year_built_from_label = ctk.CTkLabel(self.year_built_frame, text=\"From\")\n        self.year_built_to_label = ctk.CTkLabel(self.year_built_frame, text=\"To\")\n        self.sold_within_label = ctk.CTkLabel(\n            self.sold_within_frame, text=\"Sold Within\"\n        )\n        self.sold_within_from_label = ctk.CTkLabel(self.square_feet_frame, text=\"From\")\n        self.sold_within_to_label = ctk.CTkLabel(self.square_feet_frame, text=\"To\")\n\n        # Create the Buttons\n        self.for_sale_sold_om = ctk.CTkOptionMenu(\n            master=self.for_sale_sold_frame,\n            values=[status.value for status in RedfinApi.SoldStatus],\n            command=lambda x: self.status_within_activate_deactivate(x),\n        )\n\n        self.min_stories_om = ctk.CTkOptionMenu(\n            self.stories_frame, values=[story.value for story in RedfinApi.Stories]\n        )\n\n        self.min_year_built_om = ctk.CTkOptionMenu(\n            self.year_built_frame,\n            values=self.year_list,\n            command=lambda x: self.year_validation(),\n        )\n\n        self.max_year_built_om = ctk.CTkOptionMenu(\n            self.year_built_frame,\n            values=self.year_list,\n            command=lambda x: self.year_validation(),\n        )\n\n        self.house_type_house_switch = ctk.CTkSwitch(\n            self.home_type_frame,\n            text=\"House\",\n            command=self.house_type_validation,\n        )\n        self.house_type_townhouse_switch = ctk.CTkSwitch(\n            self.home_type_frame,\n            text=\"Townhouse\",\n            command=self.house_type_validation,\n        )\n        self.house_type_condo_switch = ctk.CTkSwitch(\n            self.home_type_frame,\n            text=\"Condo\",\n            command=self.house_type_validation,\n        )\n        self.house_type_mul_fam_switch = ctk.CTkSwitch(\n            self.home_type_frame,\n            text=\"Multi-Family\",\n            command=self.house_type_validation,\n        )\n\n        self.min_sqft_om = ctk.CTkOptionMenu(\n            self.square_feet_frame,\n            values=self.sqft_list,\n            command=lambda x: self.sqft_validation(),\n        )\n        self.max_sqft_om = ctk.CTkOptionMenu(\n            self.square_feet_frame,\n            values=self.sqft_list,\n            command=lambda x: self.sqft_validation(),\n        )\n        self.status_coming_soon_chb = ctk.CTkCheckBox(\n            self.status_frame, text=\"Coming soon\"\n        )\n        self.status_active_chb = ctk.CTkCheckBox(self.status_frame, text=\"Active\")\n        self.status_pending_chb = ctk.CTkCheckBox(\n            self.status_frame, text=\"Under contract/Pending\"\n        )  # missing one i think\n        self.sold_within_om = ctk.CTkOptionMenu(\n            self.sold_within_frame, values=self.sold_within_list\n        )\n\n        self.min_price_om = ctk.CTkOptionMenu(\n            self.price_range_frame,\n            values=self.price_list,\n            command=lambda x: self.price_validation(),\n        )\n        self.max_price_om = ctk.CTkOptionMenu(\n            self.price_range_frame,\n            values=self.price_list,\n            command=lambda x: self.price_validation(),\n        )\n\n        self.reset_filters_button = ctk.CTkButton(\n            self.reset_apply_frame,\n            text=\"Reset Filters\",\n            command=self.set_default_values,\n        )\n        self.apply_filters_button = ctk.CTkButton(\n            self.reset_apply_frame,\n            text=\"Apply Filters\",\n            command=self.change_to_search_page,\n        )\n\n        # Placing the widgets\n        self.for_sale_sold_label.grid(row=0, column=0)\n        self.stories_label.grid(row=0, column=0)\n        self.year_built_label.grid(row=0, column=0)\n        self.home_type_label.grid(row=0, column=0)\n        self.sqft_label.grid(row=0, column=0)\n        self.sale_status_label.grid(row=0, column=0)\n        self.price_range_label.grid(row=0, column=0)\n        self.year_built_from_label.grid(row=1, column=0)\n        self.year_built_to_label.grid(row=1, column=2)\n        self.price_range_from_label.grid(row=1, column=0)\n        self.price_range_to_label.grid(row=1, column=2)\n        self.sold_within_label.grid(row=0, column=0)\n        self.sold_within_from_label.grid(row=1, column=0)\n        self.sold_within_to_label.grid(row=1, column=2)\n\n        self.for_sale_sold_om.grid(row=0, column=1)\n        self.min_stories_om.grid(row=0, column=1)\n        self.min_year_built_om.grid(row=1, column=1)\n        self.max_year_built_om.grid(row=1, column=3)\n        self.min_sqft_om.grid(row=1, column=1)\n        self.max_sqft_om.grid(row=1, column=3)\n        self.sold_within_om.grid(row=0, column=1)\n        self.min_price_om.grid(row=1, column=1)\n        self.max_price_om.grid(row=1, column=3)\n        self.house_type_house_switch.grid(row=1, column=0)\n        self.house_type_townhouse_switch.grid(row=1, column=1)\n        self.house_type_condo_switch.grid(row=2, column=0)\n        self.house_type_mul_fam_switch.grid(row=2, column=1)\n        self.status_coming_soon_chb.grid(row=1, column=0)\n        self.status_active_chb.grid(row=1, column=1)\n        self.status_pending_chb.grid(row=1, column=2)\n        self.reset_filters_button.grid(row=0, column=0, sticky=\"w\")\n        self.apply_filters_button.grid(row=0, column=1, sticky=\"e\")\n\n    def set_default_values(self) -> None:\n        \"\"\"Set the default values for all widgets.\n        Note:\n            Should be called after init and when clicking reset button.\n        \"\"\"\n        self.for_sale_sold_om.set(RedfinApi.SoldStatus.SOLD.value)\n        self.min_stories_om.set(RedfinApi.Stories.ONE.value)\n        self.min_year_built_om.set(str(self.cur_year - 1))\n        self.max_year_built_om.set(str(self.cur_year - 1))\n        self.sold_within_om.set(self.sold_within_list[-1])\n        self.max_price_om.set(RedfinApi.Price.NONE.value)\n        self.min_price_om.set(RedfinApi.Price.NONE.value)\n        self.max_sqft_om.set(RedfinApi.Sqft.NONE.value)\n        self.min_sqft_om.set(RedfinApi.Sqft.NONE.value)\n        self.status_active_chb.deselect()\n        self.status_pending_chb.deselect()\n        self.status_coming_soon_chb.deselect()\n        self.house_type_house_switch.select()\n        self.house_type_condo_switch.deselect()\n        self.house_type_townhouse_switch.deselect()\n        self.house_type_mul_fam_switch.deselect()\n        self.status_within_activate_deactivate(self.for_sale_sold_om.get())\n\n    def status_within_activate_deactivate(self, status) -> None:\n        \"\"\"Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.\n\n        Args:\n            status (Event): ignored\n        \"\"\"\n        match self.for_sale_sold_om.get():\n            case RedfinApi.SoldStatus.FOR_SALE.value:\n                self.sale_status_label.configure(state=\"normal\")\n                self.status_active_chb.configure(state=\"normal\")\n                self.status_coming_soon_chb.configure(state=\"normal\")\n                self.status_pending_chb.configure(state=\"normal\")\n                self.sold_within_label.configure(state=\"disabled\")\n                self.sold_within_om.configure(state=\"disabled\")\n            case RedfinApi.SoldStatus.SOLD.value:\n                self.sale_status_label.configure(state=\"disabled\")\n                self.status_active_chb.configure(state=\"disabled\")\n                self.status_coming_soon_chb.configure(state=\"disabled\")\n                self.status_pending_chb.configure(state=\"disabled\")\n                self.sold_within_label.configure(state=\"normal\")\n                self.sold_within_om.configure(state=\"normal\")\n                self.status_active_chb.deselect()\n                self.status_pending_chb.deselect()\n                self.status_coming_soon_chb.deselect()\n\n    def change_to_search_page(self) -> None:\n        \"\"\"Change to search page.\"\"\"\n        self.grid_remove()\n        self.search_page.grid()\n\n    def price_validation(self):\n        \"\"\"Called when price range min om gets changed\"\"\"\n        if (\n            self.max_price_om.get() == RedfinApi.Price.NONE.value\n            or self.min_price_om.get() == RedfinApi.Price.NONE.value\n        ):\n            return\n        if int(self.max_price_om.get()) < int(self.min_price_om.get()):\n            self.max_price_om.set(self.min_price_om.get())\n\n    def year_validation(self) -> None:\n        \"\"\"Year drop down callback\"\"\"\n        if int(self.max_year_built_om.get()) < int(self.min_year_built_om.get()):\n            self.max_year_built_om.set(self.min_year_built_om.get())\n\n    def sqft_validation(self) -> None:\n        \"\"\"Sqft dropdown callback\"\"\"\n        if (\n            self.max_sqft_om.get() == RedfinApi.Sqft.NONE.value\n            or self.min_sqft_om.get() == RedfinApi.Sqft.NONE.value\n        ):\n            return\n        if int(self.max_sqft_om.get()) < int(self.min_sqft_om.get()):\n            self.max_sqft_om.set(self.min_sqft_om.get())\n\n    def house_type_validation(self) -> None:\n        \"\"\"House type switch validation to make sure at lest house is selected.\"\"\"\n        if not any(\n            [\n                self.house_type_house_switch.get(),\n                self.house_type_condo_switch.get(),\n                self.house_type_mul_fam_switch.get(),\n                self.house_type_townhouse_switch.get(),\n            ]\n        ):\n            self.house_type_house_switch.select()\n\n    def get_values(self) -> dict[str, Any]:\n        \"\"\"Get the values of all widgets on this page.\n\n        Returns:\n            dict[str, Any]: dict of values\n        \"\"\"\n        match self.sold_within_om.get():\n            case \"Last 1 week\":\n                sold_within_days = RedfinApi.SoldWithinDays.ONE_WEEK\n            case \"Last 1 month\":\n                sold_within_days = RedfinApi.SoldWithinDays.ONE_MONTH\n            case \"Last 3 months\":\n                sold_within_days = RedfinApi.SoldWithinDays.THREE_MONTHS\n            case \"Last 6 months\":\n                sold_within_days = RedfinApi.SoldWithinDays.SIX_MONTHS\n            case \"Last 1 year\":\n                sold_within_days = RedfinApi.SoldWithinDays.ONE_YEAR\n            case \"Last 2 years\":\n                sold_within_days = RedfinApi.SoldWithinDays.TWO_YEARS\n            case \"Last 3 years\":\n                sold_within_days = RedfinApi.SoldWithinDays.THREE_YEARS\n            case _:\n                sold_within_days = RedfinApi.SoldWithinDays.FIVE_YEARS\n\n        return {\n            \"for sale sold\": self.for_sale_sold_om.get(),\n            \"min stories\": self.min_stories_om.get(),\n            \"max year built\": self.max_year_built_om.get(),  # do validation here\n            \"min year built\": self.min_year_built_om.get(),\n            \"sold within\": sold_within_days.value,\n            \"status active\": bool(self.status_active_chb.get()),\n            \"status coming soon\": bool(self.status_coming_soon_chb.get()),\n            \"status pending\": bool(self.status_pending_chb.get()),\n            \"house type house\": bool(self.house_type_house_switch.get()),\n            \"house type townhouse\": bool(self.house_type_townhouse_switch.get()),\n            \"house type mul fam\": bool(self.house_type_mul_fam_switch.get()),\n            \"house type condo\": bool(self.house_type_condo_switch.get()),\n            \"max sqft\": self.max_sqft_om.get(),\n            \"min sqft\": self.min_sqft_om.get(),\n            \"max price\": self.max_price_om.get(),\n            \"min price\": self.min_price_om.get(),\n        }\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.change_to_search_page","title":"change_to_search_page()","text":"

    Change to search page.

    Source code in src\\gui\\filterspage.py
    def change_to_search_page(self) -> None:\n    \"\"\"Change to search page.\"\"\"\n    self.grid_remove()\n    self.search_page.grid()\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.create_widgets","title":"create_widgets()","text":"

    Create widgets.

    Source code in src\\gui\\filterspage.py
    def create_widgets(self) -> None:\n    \"\"\"Create widgets.\"\"\"\n    # frames\n    self.content_frame = ctk.CTkFrame(self)\n    self.for_sale_sold_frame = ctk.CTkFrame(\n        self.content_frame, width=300, height=100, fg_color=\"transparent\"\n    )\n    self.stories_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)\n    self.year_built_frame = ctk.CTkFrame(\n        self.content_frame, corner_radius=0, fg_color=\"transparent\"\n    )\n    self.home_type_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)\n    self.square_feet_frame = ctk.CTkFrame(\n        self.content_frame, corner_radius=0, fg_color=\"transparent\"\n    )\n    self.status_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)\n    self.sold_within_frame = ctk.CTkFrame(\n        self.content_frame, fg_color=\"transparent\", corner_radius=0\n    )\n    self.price_range_frame = ctk.CTkFrame(self.content_frame, corner_radius=0)\n    self.reset_apply_frame = ctk.CTkFrame(\n        self.content_frame, fg_color=\"transparent\", corner_radius=0\n    )\n\n    # make more grid\n    self.columnconfigure((0, 2), weight=1)\n    self.columnconfigure(1, weight=30)\n    self.content_frame.columnconfigure(0, weight=1, uniform=\"a\")  # uniform\n    self.for_sale_sold_frame.columnconfigure((0, 1), weight=1)\n    self.stories_frame.columnconfigure((0, 1), weight=1)\n    self.year_built_frame.columnconfigure((0, 1, 2, 3), weight=1)\n    self.home_type_frame.columnconfigure((0, 1, 2, 3), weight=1)\n    self.square_feet_frame.columnconfigure((0, 1, 2, 3), weight=1)\n    self.status_frame.columnconfigure((0, 1, 2), weight=1)\n    self.sold_within_frame.columnconfigure((0, 1), weight=1)\n    self.price_range_frame.columnconfigure((0, 1, 2, 3), weight=1)\n    self.reset_apply_frame.columnconfigure((0, 1), weight=1)\n\n    self.rowconfigure(0, weight=1)\n    self.content_frame.rowconfigure(\n        (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), weight=1, uniform=\"a\"\n    )\n    self.for_sale_sold_frame.rowconfigure(0, weight=1)\n    self.stories_frame.rowconfigure(0, weight=1)\n    self.year_built_frame.rowconfigure((0, 1), weight=1)\n    self.home_type_frame.rowconfigure((0, 1, 2), weight=1)\n    self.square_feet_frame.rowconfigure((0, 1), weight=1)\n    self.status_frame.rowconfigure((0, 1), weight=1)\n    self.sold_within_frame.rowconfigure(0, weight=1)\n    self.price_range_frame.rowconfigure((0, 1), weight=1)\n    self.reset_apply_frame.rowconfigure(0, weight=1)\n\n    # placing the frames\n    self.content_frame.grid(row=0, column=1, sticky=\"ns\")\n    self.for_sale_sold_frame.grid(row=0, column=0, sticky=\"nsew\")\n    self.stories_frame.grid(row=1, column=0, sticky=\"nesw\")\n    self.year_built_frame.grid(row=2, column=0, sticky=\"nesw\")\n    self.home_type_frame.grid(row=3, column=0, rowspan=2, sticky=\"nesw\")\n    self.square_feet_frame.grid(row=5, column=0, sticky=\"nesw\")\n    self.status_frame.grid(row=6, column=0)\n    self.sold_within_frame.grid(row=7, column=0, sticky=\"nesw\")\n    self.price_range_frame.grid(row=8, column=0, rowspan=2, sticky=\"nesw\")\n    self.reset_apply_frame.grid(row=10, column=0)\n\n    # Create the labels\n    self.for_sale_sold_label = ctk.CTkLabel(\n        self.for_sale_sold_frame, text=\"For Sale/Sold\"\n    )\n    self.stories_label = ctk.CTkLabel(self.stories_frame, text=\"Stories\")\n    self.year_built_label = ctk.CTkLabel(self.year_built_frame, text=\"Year Built\")\n    self.home_type_label = ctk.CTkLabel(self.home_type_frame, text=\"Home Type\")\n    self.sqft_label = ctk.CTkLabel(self.square_feet_frame, text=\"Square Feet\")\n    self.sale_status_label = ctk.CTkLabel(self.status_frame, text=\"Status\")\n    self.price_range_label = ctk.CTkLabel(\n        self.price_range_frame, text=\"Price Range\"\n    )\n    self.price_range_from_label = ctk.CTkLabel(self.price_range_frame, text=\"From\")\n    self.price_range_to_label = ctk.CTkLabel(self.price_range_frame, text=\"To\")\n    self.year_built_from_label = ctk.CTkLabel(self.year_built_frame, text=\"From\")\n    self.year_built_to_label = ctk.CTkLabel(self.year_built_frame, text=\"To\")\n    self.sold_within_label = ctk.CTkLabel(\n        self.sold_within_frame, text=\"Sold Within\"\n    )\n    self.sold_within_from_label = ctk.CTkLabel(self.square_feet_frame, text=\"From\")\n    self.sold_within_to_label = ctk.CTkLabel(self.square_feet_frame, text=\"To\")\n\n    # Create the Buttons\n    self.for_sale_sold_om = ctk.CTkOptionMenu(\n        master=self.for_sale_sold_frame,\n        values=[status.value for status in RedfinApi.SoldStatus],\n        command=lambda x: self.status_within_activate_deactivate(x),\n    )\n\n    self.min_stories_om = ctk.CTkOptionMenu(\n        self.stories_frame, values=[story.value for story in RedfinApi.Stories]\n    )\n\n    self.min_year_built_om = ctk.CTkOptionMenu(\n        self.year_built_frame,\n        values=self.year_list,\n        command=lambda x: self.year_validation(),\n    )\n\n    self.max_year_built_om = ctk.CTkOptionMenu(\n        self.year_built_frame,\n        values=self.year_list,\n        command=lambda x: self.year_validation(),\n    )\n\n    self.house_type_house_switch = ctk.CTkSwitch(\n        self.home_type_frame,\n        text=\"House\",\n        command=self.house_type_validation,\n    )\n    self.house_type_townhouse_switch = ctk.CTkSwitch(\n        self.home_type_frame,\n        text=\"Townhouse\",\n        command=self.house_type_validation,\n    )\n    self.house_type_condo_switch = ctk.CTkSwitch(\n        self.home_type_frame,\n        text=\"Condo\",\n        command=self.house_type_validation,\n    )\n    self.house_type_mul_fam_switch = ctk.CTkSwitch(\n        self.home_type_frame,\n        text=\"Multi-Family\",\n        command=self.house_type_validation,\n    )\n\n    self.min_sqft_om = ctk.CTkOptionMenu(\n        self.square_feet_frame,\n        values=self.sqft_list,\n        command=lambda x: self.sqft_validation(),\n    )\n    self.max_sqft_om = ctk.CTkOptionMenu(\n        self.square_feet_frame,\n        values=self.sqft_list,\n        command=lambda x: self.sqft_validation(),\n    )\n    self.status_coming_soon_chb = ctk.CTkCheckBox(\n        self.status_frame, text=\"Coming soon\"\n    )\n    self.status_active_chb = ctk.CTkCheckBox(self.status_frame, text=\"Active\")\n    self.status_pending_chb = ctk.CTkCheckBox(\n        self.status_frame, text=\"Under contract/Pending\"\n    )  # missing one i think\n    self.sold_within_om = ctk.CTkOptionMenu(\n        self.sold_within_frame, values=self.sold_within_list\n    )\n\n    self.min_price_om = ctk.CTkOptionMenu(\n        self.price_range_frame,\n        values=self.price_list,\n        command=lambda x: self.price_validation(),\n    )\n    self.max_price_om = ctk.CTkOptionMenu(\n        self.price_range_frame,\n        values=self.price_list,\n        command=lambda x: self.price_validation(),\n    )\n\n    self.reset_filters_button = ctk.CTkButton(\n        self.reset_apply_frame,\n        text=\"Reset Filters\",\n        command=self.set_default_values,\n    )\n    self.apply_filters_button = ctk.CTkButton(\n        self.reset_apply_frame,\n        text=\"Apply Filters\",\n        command=self.change_to_search_page,\n    )\n\n    # Placing the widgets\n    self.for_sale_sold_label.grid(row=0, column=0)\n    self.stories_label.grid(row=0, column=0)\n    self.year_built_label.grid(row=0, column=0)\n    self.home_type_label.grid(row=0, column=0)\n    self.sqft_label.grid(row=0, column=0)\n    self.sale_status_label.grid(row=0, column=0)\n    self.price_range_label.grid(row=0, column=0)\n    self.year_built_from_label.grid(row=1, column=0)\n    self.year_built_to_label.grid(row=1, column=2)\n    self.price_range_from_label.grid(row=1, column=0)\n    self.price_range_to_label.grid(row=1, column=2)\n    self.sold_within_label.grid(row=0, column=0)\n    self.sold_within_from_label.grid(row=1, column=0)\n    self.sold_within_to_label.grid(row=1, column=2)\n\n    self.for_sale_sold_om.grid(row=0, column=1)\n    self.min_stories_om.grid(row=0, column=1)\n    self.min_year_built_om.grid(row=1, column=1)\n    self.max_year_built_om.grid(row=1, column=3)\n    self.min_sqft_om.grid(row=1, column=1)\n    self.max_sqft_om.grid(row=1, column=3)\n    self.sold_within_om.grid(row=0, column=1)\n    self.min_price_om.grid(row=1, column=1)\n    self.max_price_om.grid(row=1, column=3)\n    self.house_type_house_switch.grid(row=1, column=0)\n    self.house_type_townhouse_switch.grid(row=1, column=1)\n    self.house_type_condo_switch.grid(row=2, column=0)\n    self.house_type_mul_fam_switch.grid(row=2, column=1)\n    self.status_coming_soon_chb.grid(row=1, column=0)\n    self.status_active_chb.grid(row=1, column=1)\n    self.status_pending_chb.grid(row=1, column=2)\n    self.reset_filters_button.grid(row=0, column=0, sticky=\"w\")\n    self.apply_filters_button.grid(row=0, column=1, sticky=\"e\")\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.get_values","title":"get_values()","text":"

    Get the values of all widgets on this page.

    Returns:

    Type Description dict[str, Any]

    dict[str, Any]: dict of values

    Source code in src\\gui\\filterspage.py
    def get_values(self) -> dict[str, Any]:\n    \"\"\"Get the values of all widgets on this page.\n\n    Returns:\n        dict[str, Any]: dict of values\n    \"\"\"\n    match self.sold_within_om.get():\n        case \"Last 1 week\":\n            sold_within_days = RedfinApi.SoldWithinDays.ONE_WEEK\n        case \"Last 1 month\":\n            sold_within_days = RedfinApi.SoldWithinDays.ONE_MONTH\n        case \"Last 3 months\":\n            sold_within_days = RedfinApi.SoldWithinDays.THREE_MONTHS\n        case \"Last 6 months\":\n            sold_within_days = RedfinApi.SoldWithinDays.SIX_MONTHS\n        case \"Last 1 year\":\n            sold_within_days = RedfinApi.SoldWithinDays.ONE_YEAR\n        case \"Last 2 years\":\n            sold_within_days = RedfinApi.SoldWithinDays.TWO_YEARS\n        case \"Last 3 years\":\n            sold_within_days = RedfinApi.SoldWithinDays.THREE_YEARS\n        case _:\n            sold_within_days = RedfinApi.SoldWithinDays.FIVE_YEARS\n\n    return {\n        \"for sale sold\": self.for_sale_sold_om.get(),\n        \"min stories\": self.min_stories_om.get(),\n        \"max year built\": self.max_year_built_om.get(),  # do validation here\n        \"min year built\": self.min_year_built_om.get(),\n        \"sold within\": sold_within_days.value,\n        \"status active\": bool(self.status_active_chb.get()),\n        \"status coming soon\": bool(self.status_coming_soon_chb.get()),\n        \"status pending\": bool(self.status_pending_chb.get()),\n        \"house type house\": bool(self.house_type_house_switch.get()),\n        \"house type townhouse\": bool(self.house_type_townhouse_switch.get()),\n        \"house type mul fam\": bool(self.house_type_mul_fam_switch.get()),\n        \"house type condo\": bool(self.house_type_condo_switch.get()),\n        \"max sqft\": self.max_sqft_om.get(),\n        \"min sqft\": self.min_sqft_om.get(),\n        \"max price\": self.max_price_om.get(),\n        \"min price\": self.min_price_om.get(),\n    }\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.house_type_validation","title":"house_type_validation()","text":"

    House type switch validation to make sure at lest house is selected.

    Source code in src\\gui\\filterspage.py
    def house_type_validation(self) -> None:\n    \"\"\"House type switch validation to make sure at lest house is selected.\"\"\"\n    if not any(\n        [\n            self.house_type_house_switch.get(),\n            self.house_type_condo_switch.get(),\n            self.house_type_mul_fam_switch.get(),\n            self.house_type_townhouse_switch.get(),\n        ]\n    ):\n        self.house_type_house_switch.select()\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.price_validation","title":"price_validation()","text":"

    Called when price range min om gets changed

    Source code in src\\gui\\filterspage.py
    def price_validation(self):\n    \"\"\"Called when price range min om gets changed\"\"\"\n    if (\n        self.max_price_om.get() == RedfinApi.Price.NONE.value\n        or self.min_price_om.get() == RedfinApi.Price.NONE.value\n    ):\n        return\n    if int(self.max_price_om.get()) < int(self.min_price_om.get()):\n        self.max_price_om.set(self.min_price_om.get())\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.set_default_values","title":"set_default_values()","text":"

    Set the default values for all widgets. Note: Should be called after init and when clicking reset button.

    Source code in src\\gui\\filterspage.py
    def set_default_values(self) -> None:\n    \"\"\"Set the default values for all widgets.\n    Note:\n        Should be called after init and when clicking reset button.\n    \"\"\"\n    self.for_sale_sold_om.set(RedfinApi.SoldStatus.SOLD.value)\n    self.min_stories_om.set(RedfinApi.Stories.ONE.value)\n    self.min_year_built_om.set(str(self.cur_year - 1))\n    self.max_year_built_om.set(str(self.cur_year - 1))\n    self.sold_within_om.set(self.sold_within_list[-1])\n    self.max_price_om.set(RedfinApi.Price.NONE.value)\n    self.min_price_om.set(RedfinApi.Price.NONE.value)\n    self.max_sqft_om.set(RedfinApi.Sqft.NONE.value)\n    self.min_sqft_om.set(RedfinApi.Sqft.NONE.value)\n    self.status_active_chb.deselect()\n    self.status_pending_chb.deselect()\n    self.status_coming_soon_chb.deselect()\n    self.house_type_house_switch.select()\n    self.house_type_condo_switch.deselect()\n    self.house_type_townhouse_switch.deselect()\n    self.house_type_mul_fam_switch.deselect()\n    self.status_within_activate_deactivate(self.for_sale_sold_om.get())\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.sqft_validation","title":"sqft_validation()","text":"

    Sqft dropdown callback

    Source code in src\\gui\\filterspage.py
    def sqft_validation(self) -> None:\n    \"\"\"Sqft dropdown callback\"\"\"\n    if (\n        self.max_sqft_om.get() == RedfinApi.Sqft.NONE.value\n        or self.min_sqft_om.get() == RedfinApi.Sqft.NONE.value\n    ):\n        return\n    if int(self.max_sqft_om.get()) < int(self.min_sqft_om.get()):\n        self.max_sqft_om.set(self.min_sqft_om.get())\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.status_within_activate_deactivate","title":"status_within_activate_deactivate(status)","text":"

    Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.

    Parameters:

    Name Type Description Default status Event

    ignored

    required Source code in src\\gui\\filterspage.py
    def status_within_activate_deactivate(self, status) -> None:\n    \"\"\"Deactivate or activate the status and sold within sections, since they depend on what type of sale a house is being searched with.\n\n    Args:\n        status (Event): ignored\n    \"\"\"\n    match self.for_sale_sold_om.get():\n        case RedfinApi.SoldStatus.FOR_SALE.value:\n            self.sale_status_label.configure(state=\"normal\")\n            self.status_active_chb.configure(state=\"normal\")\n            self.status_coming_soon_chb.configure(state=\"normal\")\n            self.status_pending_chb.configure(state=\"normal\")\n            self.sold_within_label.configure(state=\"disabled\")\n            self.sold_within_om.configure(state=\"disabled\")\n        case RedfinApi.SoldStatus.SOLD.value:\n            self.sale_status_label.configure(state=\"disabled\")\n            self.status_active_chb.configure(state=\"disabled\")\n            self.status_coming_soon_chb.configure(state=\"disabled\")\n            self.status_pending_chb.configure(state=\"disabled\")\n            self.sold_within_label.configure(state=\"normal\")\n            self.sold_within_om.configure(state=\"normal\")\n            self.status_active_chb.deselect()\n            self.status_pending_chb.deselect()\n            self.status_coming_soon_chb.deselect()\n
    "},{"location":"gui/filterspage/#gui.filterspage.FiltersPage.year_validation","title":"year_validation()","text":"

    Year drop down callback

    Source code in src\\gui\\filterspage.py
    def year_validation(self) -> None:\n    \"\"\"Year drop down callback\"\"\"\n    if int(self.max_year_built_om.get()) < int(self.min_year_built_om.get()):\n        self.max_year_built_om.set(self.min_year_built_om.get())\n
    "},{"location":"gui/searchpage/","title":"Searchpage","text":""},{"location":"gui/searchpage/#gui.searchpage.SearchPage","title":"SearchPage","text":"

    Bases: CTkFrame

    Source code in src\\gui\\searchpage.py
    class SearchPage(ctk.CTkFrame):\n    def __init__(self, master: ctk.CTk, **kwargs):\n        super().__init__(master, **kwargs)\n        self.master = master\n        self.datapage = None\n        self.label_font = ctk.CTkFont(\"Roboto\", 34)\n        self.MATCHES_TO_DISPLAY = 20  # performance and practicality\n        self.auto_complete_series = get_unique_msa_from_master()\n        self.current_auto_complete_series = None\n        self.prev_search_bar_len = 0\n        self.filters_page = FiltersPage(self.master, self)\n        self.create_widgets()\n\n    def create_widgets(self) -> None:\n        \"\"\"Create widgets.\"\"\"\n        self.top_text = ctk.CTkLabel(\n            self,\n            text=\"Residential Heating Search For Metropolitan Statistical Areas\",\n            font=self.label_font,\n            wraplength=600,\n        )\n        CTkToolTip(\n            self.top_text,\n            delay=0.25,\n            message=\"An MSA is a census defined region that consists of a city and \\nsurrounding communities that are linked by social and economic factors. \\nThe core city has a population of at least 50,000\",\n        )\n        self.redfin_filters_button = ctk.CTkButton(\n            self,\n            corner_radius=10,\n            height=35,\n            text=\"Add Filters\",\n            command=self.change_to_filters_page,\n        )\n        CTkToolTip(\n            self.redfin_filters_button,\n            delay=0.25,\n            message=\"Select filters for your search.\",\n        )\n        self.search_bar = ctk.CTkEntry(\n            self, height=40, corner_radius=40, placeholder_text=\"Search for an MSA\"\n        )\n        self.suggestion_list_box = CTkListbox(\n            self,\n            text_color=(\"gray10\", \"#DCE4EE\"),  # type: ignore\n            border_width=2,\n            command=lambda x: self.update_entry_on_autocomplete_select(x),\n        )\n        self.search_frame = ctk.CTkFrame(self, fg_color=\"transparent\")\n        self.search_button = ctk.CTkButton(\n            self.search_frame,\n            text=\"Search\",\n            fg_color=\"transparent\",\n            height=35,\n            corner_radius=10,\n            border_width=2,\n            text_color=(\"gray10\", \"#DCE4EE\"),\n            command=self.validate_entry_box_and_search,\n        )\n        self.cache_chb = ctk.CTkCheckBox(self.search_frame, text=\"Use cache\")\n\n        self.columnconfigure((0, 2), weight=1)\n        self.columnconfigure(1, weight=4)\n        self.rowconfigure(0, weight=10)\n        self.rowconfigure(1, weight=4)\n        self.rowconfigure(2, weight=10)\n\n        self.top_text.grid(column=0, row=0, columnspan=3)\n\n        self.redfin_filters_button.grid(column=0, row=1, padx=(0, 40), sticky=\"e\")\n\n        self.search_bar.grid(column=1, row=1, sticky=\"ew\")\n\n        self.suggestion_list_box.grid(column=1, row=2, sticky=\"new\", pady=(10, 0))\n\n        self.search_frame.columnconfigure(0, weight=1)\n        self.search_frame.rowconfigure((0, 1), weight=1)\n        # pady is hacky but whatever\n        self.search_frame.grid(column=2, row=1, padx=(40, 0), pady=(46, 0))\n        self.search_button.grid(column=0, row=0, sticky=\"w\")\n        self.cache_chb.grid(column=0, row=1, pady=(20, 0), sticky=\"w\")\n\n        self.suggestion_list_box.grid_remove()\n        self.search_bar.bind(\n            \"<KeyRelease>\", command=lambda x: self.update_suggestions_listbox(x)\n        )\n\n    def update_suggestions_listbox(self, x: Event | None) -> None:\n        \"\"\"Update the suggestions box based on the contents of 'self.search_bar'.\n\n        Args:\n            x (Event | None): ignored\n        \"\"\"\n        cur_text = re.escape(self.search_bar.get())\n        if cur_text == \"\":\n            # only gets called when all text has been deleted\n            self.current_auto_complete_series = self.auto_complete_series\n            self.suggestion_list_box.grid_remove()\n        else:\n            self.suggestion_list_box.delete(\"all\")\n            if (\n                self.current_auto_complete_series is None\n                or len(cur_text) < self.prev_search_bar_len\n            ):\n                self.current_auto_complete_series = self.auto_complete_series.filter(\n                    self.auto_complete_series.str.contains(rf\"(?i)^{cur_text}\")\n                )\n            else:\n                self.current_auto_complete_series = (\n                    self.current_auto_complete_series.filter(\n                        self.current_auto_complete_series.str.contains(\n                            rf\"(?i)^{cur_text}\"\n                        )\n                    )\n                )\n            self.suggestion_list_box.grid()\n            try:\n                self.current_auto_complete_series.head(\n                    self.MATCHES_TO_DISPLAY\n                ).map_elements(\n                    lambda msa: self.suggestion_list_box.insert(\n                        \"end\", msa, border_width=2, border_color=\"gray\"\n                    ),\n                    return_dtype=pl.Utf8,\n                )\n            except KeyError:\n                # always throws a key error, doesnt matter to us, just pollutes logs\n                pass\n        self.prev_search_bar_len = len(cur_text)\n\n    def update_entry_on_autocomplete_select(self, x: Event) -> None:\n        \"\"\"Suggestions list box callback for when a button in the list box is selected.\"\"\"\n        self.search_bar.delete(0, ctk.END)\n        self.search_bar.insert(0, x)\n        self.update_suggestions_listbox(None)\n\n    def validate_entry_box_and_search(self) -> None:\n        \"\"\"Validate `self.search_bar` contents and search if the contents are an MSA name.\"\"\"\n        cur_text = self.search_bar.get()\n        if len(cur_text) == 0:\n            cur_text = r\"!^\"\n        if any(self.auto_complete_series.str.contains(rf\"{cur_text}$\")):\n            self.data_page = DataPage(self.master)\n            self.data_page.grid(row=0, column=0, sticky=\"news\")\n            self.go_to_data_page(cur_text)\n            self.search_metros_threaded(cur_text)\n        else:\n            CTkMessagebox(\n                self,\n                title=\"Error\",\n                message=\"Inputted name is not in MSA name list!\",\n                icon=\"warning\",\n            )\n\n    def go_to_data_page(self, msa_name: str) -> None:\n        \"\"\"Switch to data page.\n\n        Args:\n            msa_name (str): Metropolitan Statistical Area name\n        \"\"\"\n        if self.data_page is not None:\n            self.grid_remove()\n            self.data_page.grid()\n            self.data_page.set_msa_name(msa_name)\n\n    def search_metros_threaded(self, msa_name: str) -> None:\n        \"\"\"Search the given Metropolitan Statistical Area name for housing attributes.\n\n        Args:\n            msa_name (str): Metropolitan Statistical Area name\n        \"\"\"\n        redfin_searcher = RedfinApi()\n        lock = threading.Lock()\n        with lock:\n            threading.Thread(\n                target=redfin_searcher.get_house_attributes_from_metro,\n                args=(\n                    msa_name,\n                    self.filters_page.get_values(),\n                    bool(self.cache_chb.get()),\n                ),\n                daemon=True,\n            ).start()\n\n    def change_to_filters_page(self) -> None:\n        \"\"\"Change to filters page.\"\"\"\n        if self.filters_page is not None:\n            self.filters_page.grid(row=0, column=0, sticky=\"news\")\n            self.grid_remove()\n            self.filters_page.grid()\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.change_to_filters_page","title":"change_to_filters_page()","text":"

    Change to filters page.

    Source code in src\\gui\\searchpage.py
    def change_to_filters_page(self) -> None:\n    \"\"\"Change to filters page.\"\"\"\n    if self.filters_page is not None:\n        self.filters_page.grid(row=0, column=0, sticky=\"news\")\n        self.grid_remove()\n        self.filters_page.grid()\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.create_widgets","title":"create_widgets()","text":"

    Create widgets.

    Source code in src\\gui\\searchpage.py
    def create_widgets(self) -> None:\n    \"\"\"Create widgets.\"\"\"\n    self.top_text = ctk.CTkLabel(\n        self,\n        text=\"Residential Heating Search For Metropolitan Statistical Areas\",\n        font=self.label_font,\n        wraplength=600,\n    )\n    CTkToolTip(\n        self.top_text,\n        delay=0.25,\n        message=\"An MSA is a census defined region that consists of a city and \\nsurrounding communities that are linked by social and economic factors. \\nThe core city has a population of at least 50,000\",\n    )\n    self.redfin_filters_button = ctk.CTkButton(\n        self,\n        corner_radius=10,\n        height=35,\n        text=\"Add Filters\",\n        command=self.change_to_filters_page,\n    )\n    CTkToolTip(\n        self.redfin_filters_button,\n        delay=0.25,\n        message=\"Select filters for your search.\",\n    )\n    self.search_bar = ctk.CTkEntry(\n        self, height=40, corner_radius=40, placeholder_text=\"Search for an MSA\"\n    )\n    self.suggestion_list_box = CTkListbox(\n        self,\n        text_color=(\"gray10\", \"#DCE4EE\"),  # type: ignore\n        border_width=2,\n        command=lambda x: self.update_entry_on_autocomplete_select(x),\n    )\n    self.search_frame = ctk.CTkFrame(self, fg_color=\"transparent\")\n    self.search_button = ctk.CTkButton(\n        self.search_frame,\n        text=\"Search\",\n        fg_color=\"transparent\",\n        height=35,\n        corner_radius=10,\n        border_width=2,\n        text_color=(\"gray10\", \"#DCE4EE\"),\n        command=self.validate_entry_box_and_search,\n    )\n    self.cache_chb = ctk.CTkCheckBox(self.search_frame, text=\"Use cache\")\n\n    self.columnconfigure((0, 2), weight=1)\n    self.columnconfigure(1, weight=4)\n    self.rowconfigure(0, weight=10)\n    self.rowconfigure(1, weight=4)\n    self.rowconfigure(2, weight=10)\n\n    self.top_text.grid(column=0, row=0, columnspan=3)\n\n    self.redfin_filters_button.grid(column=0, row=1, padx=(0, 40), sticky=\"e\")\n\n    self.search_bar.grid(column=1, row=1, sticky=\"ew\")\n\n    self.suggestion_list_box.grid(column=1, row=2, sticky=\"new\", pady=(10, 0))\n\n    self.search_frame.columnconfigure(0, weight=1)\n    self.search_frame.rowconfigure((0, 1), weight=1)\n    # pady is hacky but whatever\n    self.search_frame.grid(column=2, row=1, padx=(40, 0), pady=(46, 0))\n    self.search_button.grid(column=0, row=0, sticky=\"w\")\n    self.cache_chb.grid(column=0, row=1, pady=(20, 0), sticky=\"w\")\n\n    self.suggestion_list_box.grid_remove()\n    self.search_bar.bind(\n        \"<KeyRelease>\", command=lambda x: self.update_suggestions_listbox(x)\n    )\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.go_to_data_page","title":"go_to_data_page(msa_name)","text":"

    Switch to data page.

    Parameters:

    Name Type Description Default msa_name str

    Metropolitan Statistical Area name

    required Source code in src\\gui\\searchpage.py
    def go_to_data_page(self, msa_name: str) -> None:\n    \"\"\"Switch to data page.\n\n    Args:\n        msa_name (str): Metropolitan Statistical Area name\n    \"\"\"\n    if self.data_page is not None:\n        self.grid_remove()\n        self.data_page.grid()\n        self.data_page.set_msa_name(msa_name)\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.search_metros_threaded","title":"search_metros_threaded(msa_name)","text":"

    Search the given Metropolitan Statistical Area name for housing attributes.

    Parameters:

    Name Type Description Default msa_name str

    Metropolitan Statistical Area name

    required Source code in src\\gui\\searchpage.py
    def search_metros_threaded(self, msa_name: str) -> None:\n    \"\"\"Search the given Metropolitan Statistical Area name for housing attributes.\n\n    Args:\n        msa_name (str): Metropolitan Statistical Area name\n    \"\"\"\n    redfin_searcher = RedfinApi()\n    lock = threading.Lock()\n    with lock:\n        threading.Thread(\n            target=redfin_searcher.get_house_attributes_from_metro,\n            args=(\n                msa_name,\n                self.filters_page.get_values(),\n                bool(self.cache_chb.get()),\n            ),\n            daemon=True,\n        ).start()\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.update_entry_on_autocomplete_select","title":"update_entry_on_autocomplete_select(x)","text":"

    Suggestions list box callback for when a button in the list box is selected.

    Source code in src\\gui\\searchpage.py
    def update_entry_on_autocomplete_select(self, x: Event) -> None:\n    \"\"\"Suggestions list box callback for when a button in the list box is selected.\"\"\"\n    self.search_bar.delete(0, ctk.END)\n    self.search_bar.insert(0, x)\n    self.update_suggestions_listbox(None)\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.update_suggestions_listbox","title":"update_suggestions_listbox(x)","text":"

    Update the suggestions box based on the contents of 'self.search_bar'.

    Parameters:

    Name Type Description Default x Event | None

    ignored

    required Source code in src\\gui\\searchpage.py
    def update_suggestions_listbox(self, x: Event | None) -> None:\n    \"\"\"Update the suggestions box based on the contents of 'self.search_bar'.\n\n    Args:\n        x (Event | None): ignored\n    \"\"\"\n    cur_text = re.escape(self.search_bar.get())\n    if cur_text == \"\":\n        # only gets called when all text has been deleted\n        self.current_auto_complete_series = self.auto_complete_series\n        self.suggestion_list_box.grid_remove()\n    else:\n        self.suggestion_list_box.delete(\"all\")\n        if (\n            self.current_auto_complete_series is None\n            or len(cur_text) < self.prev_search_bar_len\n        ):\n            self.current_auto_complete_series = self.auto_complete_series.filter(\n                self.auto_complete_series.str.contains(rf\"(?i)^{cur_text}\")\n            )\n        else:\n            self.current_auto_complete_series = (\n                self.current_auto_complete_series.filter(\n                    self.current_auto_complete_series.str.contains(\n                        rf\"(?i)^{cur_text}\"\n                    )\n                )\n            )\n        self.suggestion_list_box.grid()\n        try:\n            self.current_auto_complete_series.head(\n                self.MATCHES_TO_DISPLAY\n            ).map_elements(\n                lambda msa: self.suggestion_list_box.insert(\n                    \"end\", msa, border_width=2, border_color=\"gray\"\n                ),\n                return_dtype=pl.Utf8,\n            )\n        except KeyError:\n            # always throws a key error, doesnt matter to us, just pollutes logs\n            pass\n    self.prev_search_bar_len = len(cur_text)\n
    "},{"location":"gui/searchpage/#gui.searchpage.SearchPage.validate_entry_box_and_search","title":"validate_entry_box_and_search()","text":"

    Validate self.search_bar contents and search if the contents are an MSA name.

    Source code in src\\gui\\searchpage.py
    def validate_entry_box_and_search(self) -> None:\n    \"\"\"Validate `self.search_bar` contents and search if the contents are an MSA name.\"\"\"\n    cur_text = self.search_bar.get()\n    if len(cur_text) == 0:\n        cur_text = r\"!^\"\n    if any(self.auto_complete_series.str.contains(rf\"{cur_text}$\")):\n        self.data_page = DataPage(self.master)\n        self.data_page.grid(row=0, column=0, sticky=\"news\")\n        self.go_to_data_page(cur_text)\n        self.search_metros_threaded(cur_text)\n    else:\n        CTkMessagebox(\n            self,\n            title=\"Error\",\n            message=\"Inputted name is not in MSA name list!\",\n            icon=\"warning\",\n        )\n
    "}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index 8bef843..3402d1d 100755 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,52 +2,52 @@ https://atlas-b2023.github.io/ResidentialElectrificationTracker/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/backend/helper/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/backend/redfinscraper/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/backend/secondarydata/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/backend/us/states/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/backend/us/unitedstatesofamerica/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/gui/app/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/gui/datapage/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/gui/filterspage/ - 2023-11-24 + 2023-12-06 daily https://atlas-b2023.github.io/ResidentialElectrificationTracker/gui/searchpage/ - 2023-11-24 + 2023-12-06 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index c162cf6..a7b9f42 100755 Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ