From a4bbe12e5d647657940aad4ea3042e429eed0d29 Mon Sep 17 00:00:00 2001 From: Niklewa Date: Fri, 26 Jul 2024 14:05:19 +0200 Subject: [PATCH] adding tests, lint --- cities/queries/fips_query.py | 21 ++++---- docs/guides/similarity_demo_ct.ipynb | 6 +-- tests/test_fips_query.py | 77 +++++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 13 deletions(-) diff --git a/cities/queries/fips_query.py b/cities/queries/fips_query.py index f48d9edb..8427a03b 100644 --- a/cities/queries/fips_query.py +++ b/cities/queries/fips_query.py @@ -3,9 +3,9 @@ import plotly.graph_objects as go from cities.utils.data_grabber import ( + CTDataGrabberCSV, DataGrabber, MSADataGrabber, - CTDataGrabberCSV, check_if_tensed, list_available_features, ) @@ -798,8 +798,7 @@ def __init__( self.outcome_percentile_range = outcome_percentile_range - -class CTFipsQuery(FipsQuery): # census tract FipsQuery +class CTFipsQuery(FipsQuery): # census tract FipsQuery def __init__( self, @@ -811,7 +810,7 @@ def __init__( time_decay=1.08, outcome_comparison_period=None, outcome_percentile_range=None, - ct_time_period: str = "pre_2020", # "pre_2020" or "post_2020" + ct_time_period: str = "pre_2020", # "pre_2020" or "post_2020" ): # self.data = MSADataGrabber() # self.all_available_features = list_available_features(level="msa") @@ -844,7 +843,9 @@ def __init__( outcome_var is None and outcome_percentile_range is not None ), "outcome_percentile_range requires an outcome variable" - self.all_available_features = list_available_features(level = "census_tract", ct_time_period = ct_time_period) + self.all_available_features = list_available_features( + level="census_tract", ct_time_period=ct_time_period + ) feature_groups = list(feature_groups_with_weights.keys()) @@ -857,13 +858,15 @@ def __init__( self.feature_groups_with_weights = feature_groups_with_weights self.feature_groups = feature_groups - self.data = CTDataGrabberCSV(ct_time_period = ct_time_period) + self.data = CTDataGrabberCSV(ct_time_period=ct_time_period) self.repo_root = self.data.repo_root self.fips = fips self.lag = lag self.top = top - self.population_var = "population" # default valuable - # population instead of gdp + self.population_var = "population" # default valuable + # population instead of gdp + + self.gdp_var = self.population_var # for sake of using 'find_eucleadian_kins' # it's fine if they're None (by default) self.outcome_var = outcome_var @@ -909,4 +912,4 @@ def __init__( self.outcome_with_percentiles["percentile"] = round( self.outcome_with_percentiles["percentile"] * 100, 2 ) - self.outcome_percentile_range = outcome_percentile_range \ No newline at end of file + self.outcome_percentile_range = outcome_percentile_range diff --git a/docs/guides/similarity_demo_ct.ipynb b/docs/guides/similarity_demo_ct.ipynb index cd87997a..d705481a 100644 --- a/docs/guides/similarity_demo_ct.ipynb +++ b/docs/guides/similarity_demo_ct.ipynb @@ -11,8 +11,8 @@ "import os\n", "\n", "from cities.queries.fips_query import CTFipsQuery\n", + "\n", "# proper imports\n", - "from cities.utils.data_grabber import CTDataGrabberCSV, list_available_features\n", "\n", "smoke_test = \"CI\" in os.environ\n", "smoke_test = True\n", @@ -26940,7 +26940,7 @@ } ], "source": [ - "f = CTFipsQuery(1003010903, \"population\") \n", + "f = CTFipsQuery(1003010903, \"population\")\n", "f.compare_my_outcome_to_others(sample_size=sample_size, range_multiplier=10)" ] }, @@ -27985,7 +27985,7 @@ " 34037374200,\n", " \"population\",\n", " feature_groups_with_weights={\"population\": 1, \"urbanicity\": 2},\n", - " ct_time_period = \"post_2020\",\n", + " ct_time_period=\"post_2020\",\n", ")\n", "\n", "f.find_euclidean_kins()\n", diff --git a/tests/test_fips_query.py b/tests/test_fips_query.py index a96364f9..7b68b9c3 100644 --- a/tests/test_fips_query.py +++ b/tests/test_fips_query.py @@ -1,6 +1,6 @@ import pytest -from cities.queries.fips_query import FipsQuery, MSAFipsQuery +from cities.queries.fips_query import CTFipsQuery, FipsQuery, MSAFipsQuery from cities.utils.data_grabber import DataGrabber data = DataGrabber() @@ -66,6 +66,9 @@ def test_euclidean_kins_dont_die(query): f.find_euclidean_kins() +# MSA level + + def test_fips_query_MSA_init(): f1007 = MSAFipsQuery( fips=10780, @@ -124,3 +127,75 @@ def test_fips_query_MSA_init(): def test_euclidean_kins_dont_die_msa(query): f = query f.find_euclidean_kins() + + +# census tract level + + +def test_fips_query_CT_init(): + f34031124401 = CTFipsQuery( + fips=34031124401, + outcome_var="population", + feature_groups_with_weights={"population": 4, "urbanicity": 4}, + lag=0, + top=8, + ) + + assert f34031124401.outcome_var == "population" + assert f34031124401.feature_groups == ["population", "urbanicity"] + assert list(f34031124401.data.std_wide.keys()) == ["population", "urbanicity"] + + assert f34031124401.data.std_wide["population"].shape[0] > 100 + assert f34031124401.data.std_wide["urbanicity"].shape[0] > 100 + + +queries_ct = [ + CTFipsQuery(45051050303, "population", lag=0, top=5, time_decay=1.06), + CTFipsQuery( + 56033000600, + outcome_var="population", + feature_groups_with_weights={"population": 4, "urbanicity": 4}, + lag=0, + top=5, + time_decay=1.03, + ), + CTFipsQuery( + 6019003808, + feature_groups_with_weights={"population": 4, "urbanicity": 4}, + lag=0, + top=5, + time_decay=1.03, + ct_time_period="post_2020", + ), + CTFipsQuery( + 21089040100, + outcome_var="population", + feature_groups_with_weights={"population": 0, "urbanicity": 4}, + lag=0, + top=5, + time_decay=1.03, + ), + CTFipsQuery( + 53061051000, + "population", + lag=2, + top=5, + time_decay=1.03, + ct_time_period="post_2020", + ), + CTFipsQuery( + 31047968300, + outcome_var="population", + feature_groups_with_weights={"population": 4, "urbanicity": 4}, + lag=2, + top=5, + time_decay=1.03, + ct_time_period="post_2020", + ), +] + + +@pytest.mark.parametrize("query", queries_ct) +def test_euclidean_kins_dont_die_ct(query): + f = query + f.find_euclidean_kins()