Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create .gitignore2 #1

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#census file
/Census_token.txt
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
67 changes: 67 additions & 0 deletions ChangetoMini.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0df3f5c2-163d-4335-a438-617a04960054",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import csv\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "832b7eaf-774b-4ed0-9f05-57e26d8f2175",
"metadata": {},
"source": [
"Set up\n",
"\n",
"This file....\n",
"\n",
"LAR / HMDA data (state_AZ-CA.csv)\n",
"\n",
"To download the entire file, go to this link. In Step 1 select Arizona and California. Then click download dataset.\n",
"Save this file to the /input_data/ folder.\n",
"Note: This file is 1.6GB, so it is not in the repo.\n",
"Note: There are columns with \"mixed types\".\n",
"The code below will create a mini version to work with while we develop the analysis."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1e7f4977-3974-4204-8262-c830419ce162",
"metadata": {},
"outputs": [],
"source": [
"if not os.path.exists('./input_data/state_AZ-CA_mini.csv'):\n",
" hmda = pd.read_csv('./input_data/state_AZ-CA.csv')\n",
" hmda.sample(50000).to_csv('./input_data/state_AZ-CA_mini.csv',index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
178 changes: 178 additions & 0 deletions CleaningMini.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "0074bc06-0bff-4e08-ad28-88e62ec5d1ba",
"metadata": {},
"outputs": [],
"source": [
"#From chatGPT "
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a5bf85b8-3567-46d6-a7b9-5aaae5313f4d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 50000 entries, 0 to 49999\n",
"Data columns (total 99 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 activity_year 50000 non-null int64 \n",
" 1 lei 50000 non-null object \n",
" 2 derived_msa-md 50000 non-null int64 \n",
" 3 state_code 50000 non-null object \n",
" 4 county_code 49821 non-null float64\n",
" 5 census_tract 49783 non-null float64\n",
" 6 conforming_loan_limit 49809 non-null object \n",
" 7 derived_loan_product_type 50000 non-null object \n",
" 8 derived_dwelling_category 50000 non-null object \n",
" 9 derived_ethnicity 50000 non-null object \n",
" 10 derived_race 50000 non-null object \n",
" 11 derived_sex 50000 non-null object \n",
" 12 action_taken 50000 non-null int64 \n",
" 13 purchaser_type 50000 non-null int64 \n",
" 14 preapproval 50000 non-null int64 \n",
" 15 loan_type 50000 non-null int64 \n",
" 16 loan_purpose 50000 non-null int64 \n",
" 17 lien_status 50000 non-null int64 \n",
" 18 reverse_mortgage 50000 non-null int64 \n",
" 19 open-end_line_of_credit 50000 non-null int64 \n",
" 20 business_or_commercial_purpose 50000 non-null int64 \n",
" 21 loan_amount 50000 non-null float64\n",
" 22 loan_to_value_ratio 33134 non-null object \n",
" 23 interest_rate 35693 non-null object \n",
" 24 rate_spread 28021 non-null object \n",
" 25 hoepa_status 50000 non-null int64 \n",
" 26 total_loan_costs 30300 non-null object \n",
" 27 total_points_and_fees 377 non-null object \n",
" 28 origination_charges 30655 non-null object \n",
" 29 discount_points 11852 non-null object \n",
" 30 lender_credits 13485 non-null object \n",
" 31 loan_term 49396 non-null object \n",
" 32 prepayment_penalty_term 1313 non-null object \n",
" 33 intro_rate_period 4486 non-null object \n",
" 34 negative_amortization 50000 non-null int64 \n",
" 35 interest_only_payment 50000 non-null int64 \n",
" 36 balloon_payment 50000 non-null int64 \n",
" 37 other_nonamortizing_features 50000 non-null int64 \n",
" 38 property_value 39142 non-null object \n",
" 39 construction_method 50000 non-null int64 \n",
" 40 occupancy_type 50000 non-null int64 \n",
" 41 manufactured_home_secured_property_type 50000 non-null int64 \n",
" 42 manufactured_home_land_property_interest 50000 non-null int64 \n",
" 43 total_units 50000 non-null object \n",
" 44 multifamily_affordable_units 303 non-null object \n",
" 45 income 43219 non-null float64\n",
" 46 debt_to_income_ratio 32464 non-null object \n",
" 47 applicant_credit_score_type 50000 non-null int64 \n",
" 48 co-applicant_credit_score_type 50000 non-null int64 \n",
" 49 applicant_ethnicity-1 49980 non-null float64\n",
" 50 applicant_ethnicity-2 4356 non-null float64\n",
" 51 applicant_ethnicity-3 58 non-null float64\n",
" 52 applicant_ethnicity-4 1 non-null float64\n",
" 53 applicant_ethnicity-5 0 non-null float64\n",
" 54 co-applicant_ethnicity-1 49993 non-null float64\n",
" 55 co-applicant_ethnicity-2 1832 non-null float64\n",
" 56 co-applicant_ethnicity-3 16 non-null float64\n",
" 57 co-applicant_ethnicity-4 0 non-null float64\n",
" 58 co-applicant_ethnicity-5 0 non-null float64\n",
" 59 applicant_ethnicity_observed 50000 non-null int64 \n",
" 60 co-applicant_ethnicity_observed 50000 non-null int64 \n",
" 61 applicant_race-1 49985 non-null float64\n",
" 62 applicant_race-2 4731 non-null float64\n",
" 63 applicant_race-3 414 non-null float64\n",
" 64 applicant_race-4 36 non-null float64\n",
" 65 applicant_race-5 13 non-null float64\n",
" 66 co-applicant_race-1 49997 non-null float64\n",
" 67 co-applicant_race-2 2133 non-null float64\n",
" 68 co-applicant_race-3 179 non-null float64\n",
" 69 co-applicant_race-4 13 non-null float64\n",
" 70 co-applicant_race-5 2 non-null float64\n",
" 71 applicant_race_observed 50000 non-null int64 \n",
" 72 co-applicant_race_observed 50000 non-null int64 \n",
" 73 applicant_sex 50000 non-null int64 \n",
" 74 co-applicant_sex 50000 non-null int64 \n",
" 75 applicant_sex_observed 50000 non-null int64 \n",
" 76 co-applicant_sex_observed 50000 non-null int64 \n",
" 77 applicant_age 50000 non-null object \n",
" 78 co-applicant_age 50000 non-null object \n",
" 79 applicant_age_above_62 45255 non-null object \n",
" 80 co-applicant_age_above_62 21240 non-null object \n",
" 81 submission_of_application 50000 non-null int64 \n",
" 82 initially_payable_to_institution 50000 non-null int64 \n",
" 83 aus-1 50000 non-null int64 \n",
" 84 aus-2 3357 non-null float64\n",
" 85 aus-3 746 non-null float64\n",
" 86 aus-4 210 non-null float64\n",
" 87 aus-5 183 non-null float64\n",
" 88 denial_reason-1 50000 non-null int64 \n",
" 89 denial_reason-2 731 non-null float64\n",
" 90 denial_reason-3 100 non-null float64\n",
" 91 denial_reason-4 14 non-null float64\n",
" 92 tract_population 50000 non-null int64 \n",
" 93 tract_minority_population_percent 50000 non-null float64\n",
" 94 ffiec_msa_md_median_family_income 50000 non-null int64 \n",
" 95 tract_to_msa_income_percentage 50000 non-null int64 \n",
" 96 tract_owner_occupied_units 50000 non-null int64 \n",
" 97 tract_one_to_four_family_homes 50000 non-null int64 \n",
" 98 tract_median_age_of_housing_units 50000 non-null int64 \n",
"dtypes: float64(32), int64(40), object(27)\n",
"memory usage: 37.8+ MB\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"def clean_hmda_csv(file_path):\n",
" # Load the csv file into a pandas DataFrame\n",
" df = pd.read_csv(file_path)\n",
" \n",
" # Remove leading and trailing whitespaces from each column\n",
" df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n",
" \n",
" return df\n",
"\n",
"hmdadf = clean_hmda_csv('./input_data/state_AZ-CA_mini.csv')\n",
"hmdadf.info()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f76b3ac9-b52f-48c9-98d4-22e8a33ca1ff",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
723 changes: 723 additions & 0 deletions MinoritiesMapFromCensusAZ-Copy1.ipynb

Large diffs are not rendered by default.

Binary file added Old Code/.DS_Store
Binary file not shown.
659 changes: 659 additions & 0 deletions Old Code/MapFromCensusAZ.ipynb

Large diffs are not rendered by default.

658 changes: 658 additions & 0 deletions Old Code/MapFromCensusCA.ipynb

Large diffs are not rendered by default.

656 changes: 656 additions & 0 deletions Old Code/MapFromCensusVA.ipynb

Large diffs are not rendered by default.

Empty file added Old Code/untitled.txt
Empty file.
1 change: 1 addition & 0 deletions cleaned_hmda.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
activity_year,lei,derived_msa-md,state_code,county_code,census_tract,conforming_loan_limit,derived_loan_product_type,derived_dwelling_category,derived_ethnicity,derived_race,derived_sex,action_taken,purchaser_type,preapproval,loan_type,loan_purpose,lien_status,reverse_mortgage,open-end_line_of_credit,business_or_commercial_purpose,loan_amount,loan_to_value_ratio,interest_rate,rate_spread,hoepa_status,total_loan_costs,total_points_and_fees,origination_charges,discount_points,lender_credits,loan_term,prepayment_penalty_term,intro_rate_period,negative_amortization,interest_only_payment,balloon_payment,other_nonamortizing_features,property_value,construction_method,occupancy_type,manufactured_home_secured_property_type,manufactured_home_land_property_interest,total_units,multifamily_affordable_units,income,debt_to_income_ratio,applicant_credit_score_type,co-applicant_credit_score_type,applicant_ethnicity-1,applicant_ethnicity-2,applicant_ethnicity-3,applicant_ethnicity-4,applicant_ethnicity-5,co-applicant_ethnicity-1,co-applicant_ethnicity-2,co-applicant_ethnicity-3,co-applicant_ethnicity-4,co-applicant_ethnicity-5,applicant_ethnicity_observed,co-applicant_ethnicity_observed,applicant_race-1,applicant_race-2,applicant_race-3,applicant_race-4,applicant_race-5,co-applicant_race-1,co-applicant_race-2,co-applicant_race-3,co-applicant_race-4,co-applicant_race-5,applicant_race_observed,co-applicant_race_observed,applicant_sex,co-applicant_sex,applicant_sex_observed,co-applicant_sex_observed,applicant_age,co-applicant_age,applicant_age_above_62,co-applicant_age_above_62,submission_of_application,initially_payable_to_institution,aus-1,aus-2,aus-3,aus-4,aus-5,denial_reason-1,denial_reason-2,denial_reason-3,denial_reason-4,tract_population,tract_minority_population_percent,ffiec_msa_md_median_family_income,tract_to_msa_income_percentage,tract_owner_occupied_units,tract_one_to_four_family_homes,tract_median_age_of_housing_units
Binary file added input_data/.DS_Store
Binary file not shown.
50,001 changes: 50,001 additions & 0 deletions input_data/state_AZ-CA_mini.csv

Large diffs are not rendered by default.