diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2e5da7d..2573e70 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc index 2f9a42a..e757576 100644 Binary files a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc and b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/__pycache__/build.cpython-36.pyc b/q01_outlier_removal/__pycache__/build.cpython-36.pyc index 8248a16..e7b0469 100644 Binary files a/q01_outlier_removal/__pycache__/build.cpython-36.pyc and b/q01_outlier_removal/__pycache__/build.cpython-36.pyc differ diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py index ec278ba..5ad3637 100644 --- a/q01_outlier_removal/build.py +++ b/q01_outlier_removal/build.py @@ -1,8 +1,19 @@ +# %load q01_outlier_removal/build.py # Default imports import pandas as pd - +import numpy as np loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) # Write your Solution here: +def outlier_removal(loan_data): + q1 = loan_data['ApplicantIncome'].quantile(q=0.95) + q2 = loan_data['CoapplicantIncome'].quantile(q=0.95) + q3 = loan_data['LoanAmount'].quantile(q=0.95) + loan_numeric_filtered = loan_data[np.invert((loan_data['ApplicantIncome']>q1) | (loan_data['CoapplicantIncome']>q2) | (loan_data['LoanAmount']>q3))] + return loan_numeric_filtered +outlier_removal(loan_data) + + + diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc index 5a057ff..02f6fbe 100644 Binary files a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc and b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc index 4c0b6c7..67dbd9c 100644 Binary files a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc and b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..b970148 Binary files /dev/null and b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..b3b5b6c Binary files /dev/null and b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py index b56e2bc..47583ed 100644 --- a/q02_data_cleaning_all/build.py +++ b/q02_data_cleaning_all/build.py @@ -1,3 +1,4 @@ +# %load q02_data_cleaning_all/build.py # Default Imports import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__')))) @@ -12,3 +13,16 @@ # Write your solution here : +def data_cleaning(loan_data): + X = loan_data.drop('Loan_Status',1) + y = pd.Series(loan_data['Loan_Status']) + X_num = X.select_dtypes(include = [np.number]) + X_num.fillna(X_num.mean(),inplace=True) + X_cat = X.select_dtypes(exclude = [np.number]) + X_cat = X_cat.apply(lambda x:x.fillna(x.value_counts().index[0])) + X = pd.concat([X_num,X_cat],axis=1) + X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=9) + return X,y,pd.DataFrame(X_train),pd.DataFrame(X_test),y_train,y_test +data_cleaning(loan_data) + + diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..2100565 Binary files /dev/null and b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc new file mode 100644 index 0000000..cd3413a Binary files /dev/null and b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..84f24a5 Binary files /dev/null and b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..0e6a5e5 Binary files /dev/null and b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/build.py b/q02_data_cleaning_all_2/build.py index e20ff7b..10a9d5e 100644 --- a/q02_data_cleaning_all_2/build.py +++ b/q02_data_cleaning_all_2/build.py @@ -1,3 +1,4 @@ +# %load q02_data_cleaning_all_2/build.py # Default Imports import pandas as pd import numpy as np @@ -11,3 +12,14 @@ # Write your solution here : +def data_cleaning_2(X_train,X_test,y_train,y_test): + X_train = pd.get_dummies(X_train) + X_train = np.sqrt(X_train) + X_test = pd.get_dummies(X_test) + X_test = np.sqrt(X_test) + X_train = X_train.drop(['Gender_Female','Married_No','Dependents_0','Education_Graduate','Self_Employed_No','Property_Area_Rural'],axis=1) + X_test = X_test.drop(['Gender_Female','Married_No','Dependents_0','Education_Graduate','Self_Employed_No','Property_Area_Rural'],axis=1) + return X_train,X_test,y_train,y_test +data_cleaning_2(X_train,X_test,y_train,y_test) + + diff --git a/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..517ddb4 Binary files /dev/null and b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc new file mode 100644 index 0000000..03a930a Binary files /dev/null and b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc differ diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..500af87 Binary files /dev/null and b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..2e7c90a Binary files /dev/null and b/q03_logistic_regression/__pycache__/build.cpython-36.pyc differ diff --git a/q03_logistic_regression/build.py b/q03_logistic_regression/build.py index cdbd506..3da52f7 100644 --- a/q03_logistic_regression/build.py +++ b/q03_logistic_regression/build.py @@ -1,3 +1,4 @@ +# %load q03_logistic_regression/build.py # Default Imports import pandas as pd from sklearn.preprocessing import StandardScaler @@ -15,4 +16,18 @@ # Write your solution code here: +def logistic_regression(X_train,X_test,y_train,y_test): + scaler = StandardScaler() + X_train['ApplicantIncome'] = scaler.fit_transform(X_train[['ApplicantIncome']]) + X_train['CoapplicantIncome'] = scaler.fit_transform(X_train[['CoapplicantIncome']]) + X_train['LoanAmount'] = scaler.fit_transform(X_train[['LoanAmount']]) + X_test['ApplicantIncome'] = scaler.fit_transform(X_test[['ApplicantIncome']]) + X_test['CoapplicantIncome'] = scaler.fit_transform(X_test[['CoapplicantIncome']]) + X_test['LoanAmount'] = scaler.fit_transform(X_test[['LoanAmount']]) + log_reg = LogisticRegression() + log_reg.fit(X_train,y_train) + y_pred = log_reg.predict(X_test) + return confusion_matrix(y_test,y_pred) +logistic_regression(X_train,X_test,y_train,y_test) + diff --git a/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..622dfd4 Binary files /dev/null and b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc new file mode 100644 index 0000000..38a29b4 Binary files /dev/null and b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc differ