Skip to content

Commit

Permalink
Merge pull request #119 from BasisResearch/ru-retrain-models
Browse files Browse the repository at this point in the history
retrain models, reinstate all tests and notebook tests
  • Loading branch information
riadas authored Feb 17, 2024
2 parents aee3dc2 + 0e02645 commit 0cfefe2
Show file tree
Hide file tree
Showing 635 changed files with 1,086 additions and 311 deletions.
30 changes: 18 additions & 12 deletions cities/modeling/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,44 @@
from pyro.optim import Adam
from scipy.stats import spearmanr


from cities.utils.data_grabber import (
DataGrabber,
list_available_features,
list_tensed_features,
)


def drop_high_correlation(df, threshold=0.85):
df_var = df.iloc[:,2:].copy()
df_var = df.iloc[:, 2:].copy()
correlation_matrix, _ = spearmanr(df_var)

high_correlation_pairs = [(df_var.columns[i], df_var.columns[j])
for i in range(df_var.shape[1])
for j in range(i + 1, df_var.shape[1])
if abs(correlation_matrix[i, j]) > threshold and abs(correlation_matrix[i, j]) < 1.0]
high_correlation_pairs = [(var1, var2) for var1, var2 in high_correlation_pairs if var1 != var2]
high_correlation_pairs = [
(df_var.columns[i], df_var.columns[j])
for i in range(df_var.shape[1])
for j in range(i + 1, df_var.shape[1])
if abs(correlation_matrix[i, j]) > threshold
and abs(correlation_matrix[i, j]) < 1.0
]
high_correlation_pairs = [
(var1, var2) for var1, var2 in high_correlation_pairs if var1 != var2
]

removed = set()
print(f"Highly correlated pairs: {high_correlation_pairs}, second elements will be dropped")
print(
f"Highly correlated pairs: {high_correlation_pairs}, second elements will be dropped"
)
for var1, var2 in high_correlation_pairs:
assert var2 in df_var.columns
for var1, var2 in high_correlation_pairs:
if var2 in df_var.columns:
removed.add(var2)
df_var.drop(var2, axis =1, inplace=True)
df_var.drop(var2, axis=1, inplace=True)

result = pd.concat([df.iloc[:, :2], df_var], axis=1)
print(f"Removed {removed} due to correlation > {threshold}")
return result



def prep_wide_data_for_inference(
outcome_dataset: str, intervention_dataset: str, forward_shift: int
):
Expand Down Expand Up @@ -115,7 +121,7 @@ def prep_wide_data_for_inference(
)

f_covariates_joint = drop_high_correlation(f_covariates_joint)

assert f_covariates_joint["GeoFIPS"].equals(intervention["GeoFIPS"])

# extract data for which intervention and outcome overlap
Expand Down Expand Up @@ -198,7 +204,7 @@ def prep_wide_data_for_inference(
"y": y,
"years_available": years_available,
"outcome_years": outcome_years_to_keep,
"covariates_df": f_covariates_joint
"covariates_df": f_covariates_joint,
}


Expand Down
2 changes: 1 addition & 1 deletion cities/modeling/training_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
remaining -= 1
logging.info(
f"Training of {guide_name} completed in {duration:.2f} seconds. "
f"{remaining} out of {N_combinations} guides remain to be trained."
f"{int(remaining)} out of {N_combinations} guides remain to be trained."
)

logging.info("All guides are now available.")
467 changes: 452 additions & 15 deletions data/model_guides/.training.log

Large diffs are not rendered by default.

Binary file modified data/model_guides/spending_HHS_gdp_1_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_gdp_1_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_gdp_2_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_gdp_2_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_gdp_3_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_gdp_3_params.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_industry_mining_total_1_guide.pkl
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_medianHouseholdIncome_1_guide.pkl
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_population_1_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_population_1_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_population_2_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_population_2_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_population_3_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_population_3_params.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAll_1_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAll_1_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAll_2_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAll_2_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAll_3_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAll_3_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAllprct_1_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAllprct_1_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAllprct_2_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAllprct_2_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAllprct_3_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyAllprct_3_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18_1_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18_1_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18_2_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18_2_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18_3_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18_3_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18prct_1_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18prct_1_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18prct_2_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18prct_2_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18prct_3_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_povertyUnder18prct_3_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_unemployment_rate_1_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_unemployment_rate_1_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_unemployment_rate_2_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_unemployment_rate_2_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_unemployment_rate_3_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_HHS_unemployment_rate_3_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_commerce_gdp_1_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_commerce_gdp_1_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_commerce_gdp_2_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_commerce_gdp_2_params.pth
Binary file not shown.
Binary file modified data/model_guides/spending_commerce_gdp_3_guide.pkl
Binary file not shown.
Binary file modified data/model_guides/spending_commerce_gdp_3_params.pth
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 0cfefe2

Please sign in to comment.