Skip to content

Commit

Permalink
Merge pull request #121 from VikramsDataScience/churn-propensity-model
Browse files Browse the repository at this point in the history
Restructured repo to better suit relative imports
  • Loading branch information
VikramsDataScience authored Aug 30, 2024
2 parents aef8424 + 266dcfb commit 9c9aa98
Show file tree
Hide file tree
Showing 15 changed files with 10 additions and 31 deletions.
11 changes: 0 additions & 11 deletions ECommerce_Churn_Propensity_Model/__init__.py

This file was deleted.

Binary file not shown.
6 changes: 0 additions & 6 deletions ECommerce_Churn_Propensity_Model/main.py

This file was deleted.

1 change: 0 additions & 1 deletion ECommerce_Churn_Propensity_Model/src/eda/EDA.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from pathlib import Path
from ydata_profiling import ProfileReport
from phik import phik_matrix, significance_matrix

# Load variables from __init__.py
from . import Config, read_impute_data, doanes_formula

Expand Down
2 changes: 1 addition & 1 deletion ECommerce_Churn_Propensity_Model/src/eda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def read_impute_data(df_path, float_cols, categorical_cols, output_path, sheet_n
elif '.csv' in df_path.suffix:
df = pd.read_csv(df_path)

# Cast float_columns as integers, impute NaN values using MissForest
# Cast float_columns as integers, dynamically impute values using MissForest
with suppress_stdout():
df = missforest_imputer.fit_transform(x=df,
categorical=categorical_cols)
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
config = Config()
data_path = config.data_path
seed = config.seed
churn_app_path = Path(config.churn_app_models)

insample_scores = pd.DataFrame(columns=['Model', 'Precision', 'Recall', 'F1-Score'])
outofsample_scores = pd.DataFrame(columns=['Model', 'Precision', 'Recall', 'F1-Score'])
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
import pandas as pd
from pathlib import Path
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, ConfusionMatrixDisplay
from matplotlib import pyplot as plt
from pathlib import Path
import pickle

import pandas as pd
# Load variables from __init__.py
from . import models_config, insample_scores, outofsample_scores, X, X_train, X_test, y_train, y_test, Config, data_path

config = Config()
churn_app_path = Path(config.churn_app_models)
from . import models_config, insample_scores, outofsample_scores, data_path, X, X_train, X_test, y_train, y_test, churn_app_path

for model in models_config:
print(f'COMMENCING TRAINING FOR \'{model}\':')
Expand All @@ -29,7 +25,7 @@
train_conf_matrix = confusion_matrix(y_train, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=train_conf_matrix, display_labels=[False, True])
cm_display.plot()
plt.savefig(Path(data_path / 'train_conf_matrix.png'))
plt.savefig(Path(data_path) / 'train_conf_matrix.png')

# Test set predictions (Out of Sample)
y_test_pred = models_config[model].predict(X_test)
Expand All @@ -46,7 +42,7 @@
test_conf_matrix = confusion_matrix(y_test, y_test_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=test_conf_matrix, display_labels=[False, True])
cm_display.plot()
plt.savefig(Path(data_path / 'OOS_conf_matrix.png'))
plt.savefig(Path(data_path) / 'OOS_conf_matrix.png')

if model == 'logistic_regression':
print(f'{model} Feature Importances:\n', models_config[model].coef_)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ def pre_processing(df_path, bins, onehot_cols, output_path, bin_cols=str, sheet_
df[bin_cols] = pd.cut(df[bin_cols], bins, right=False)
print(df.value_counts(bin_cols))

df.set_index('CustomerID', inplace=True)
df = pd.get_dummies(df, columns=onehot_cols, dtype=int)
# Rename the closed interval '[' columns to suit XGBClassifier() class. Otherwise XGBClassifier() will raise column name errors
df.columns = [col.replace('[', '(') for col in df.columns]
# df = df.drop(['Unnamed: 0'], axis=1)

# Save PreProcessed Data Frame for downstream consumption
df.to_csv(output_path, index=False)
Expand Down
Binary file not shown.
Binary file not shown.
4 changes: 3 additions & 1 deletion ECommerce_Churn_Propensity_Model/tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import unittest
from pathlib import Path
from .src import read_impute_data, pre_processing, Config
from .src.config import Config
from .src.eda import read_impute_data
from .src.preprocessing import pre_processing

config = Config()
data_path = config.data_path
Expand Down

0 comments on commit 9c9aa98

Please sign in to comment.