diff --git a/ECommerce_Churn_Propensity_Model/__init__.py b/ECommerce_Churn_Propensity_Model/__init__.py deleted file mode 100644 index b1afa1d..0000000 --- a/ECommerce_Churn_Propensity_Model/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from .src.config import Config -from pathlib import Path - -# Model version -__version__ = '0.01' - -# Initialise global variables from config module -config = Config() -data_path = Path(config.data_path) -churn_app_path = Path(config.churn_app_models) -seed = config.seed \ No newline at end of file diff --git a/ECommerce_Churn_Propensity_Model/__pycache__/__init__.cpython-311.pyc b/ECommerce_Churn_Propensity_Model/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..6337db9 Binary files /dev/null and b/ECommerce_Churn_Propensity_Model/__pycache__/__init__.cpython-311.pyc differ diff --git a/ECommerce_Churn_Propensity_Model/main.py b/ECommerce_Churn_Propensity_Model/main.py deleted file mode 100644 index bc15226..0000000 --- a/ECommerce_Churn_Propensity_Model/main.py +++ /dev/null @@ -1,6 +0,0 @@ -import argparse - -parser = argparse.ArgumentParser(description='Churn propensity pipeline') -parser.add_argument('--module', type=str, choices=['eda', 'preprocessing', 'model_training']) -args = parser.parse_args() - diff --git a/ECommerce_Churn_Propensity_Model/src/eda/EDA.py b/ECommerce_Churn_Propensity_Model/src/eda/EDA.py index 37e4257..70c3994 100644 --- a/ECommerce_Churn_Propensity_Model/src/eda/EDA.py +++ b/ECommerce_Churn_Propensity_Model/src/eda/EDA.py @@ -2,7 +2,6 @@ from pathlib import Path from ydata_profiling import ProfileReport from phik import phik_matrix, significance_matrix - # Load variables from __init__.py from . import Config, read_impute_data, doanes_formula diff --git a/ECommerce_Churn_Propensity_Model/src/eda/__init__.py b/ECommerce_Churn_Propensity_Model/src/eda/__init__.py index e203fa8..850872f 100644 --- a/ECommerce_Churn_Propensity_Model/src/eda/__init__.py +++ b/ECommerce_Churn_Propensity_Model/src/eda/__init__.py @@ -47,7 +47,7 @@ def read_impute_data(df_path, float_cols, categorical_cols, output_path, sheet_n elif '.csv' in df_path.suffix: df = pd.read_csv(df_path) - # Cast float_columns as integers, impute NaN values using MissForest + # Cast float_columns as integers, dynamically impute values using MissForest with suppress_stdout(): df = missforest_imputer.fit_transform(x=df, categorical=categorical_cols) diff --git a/ECommerce_Churn_Propensity_Model/src/eda/__pycache__/EDA.cpython-311.pyc b/ECommerce_Churn_Propensity_Model/src/eda/__pycache__/EDA.cpython-311.pyc new file mode 100644 index 0000000..01a67a6 Binary files /dev/null and b/ECommerce_Churn_Propensity_Model/src/eda/__pycache__/EDA.cpython-311.pyc differ diff --git a/ECommerce_Churn_Propensity_Model/src/eda/__pycache__/__init__.cpython-311.pyc b/ECommerce_Churn_Propensity_Model/src/eda/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..c5acc31 Binary files /dev/null and b/ECommerce_Churn_Propensity_Model/src/eda/__pycache__/__init__.cpython-311.pyc differ diff --git a/ECommerce_Churn_Propensity_Model/src/model_training/__init__.py b/ECommerce_Churn_Propensity_Model/src/model_training/__init__.py index 6972d90..3d7b6bd 100644 --- a/ECommerce_Churn_Propensity_Model/src/model_training/__init__.py +++ b/ECommerce_Churn_Propensity_Model/src/model_training/__init__.py @@ -10,6 +10,7 @@ config = Config() data_path = config.data_path seed = config.seed +churn_app_path = Path(config.churn_app_models) insample_scores = pd.DataFrame(columns=['Model', 'Precision', 'Recall', 'F1-Score']) outofsample_scores = pd.DataFrame(columns=['Model', 'Precision', 'Recall', 'F1-Score']) diff --git a/ECommerce_Churn_Propensity_Model/src/model_training/__pycache__/__init__.cpython-311.pyc b/ECommerce_Churn_Propensity_Model/src/model_training/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..8f180dd Binary files /dev/null and b/ECommerce_Churn_Propensity_Model/src/model_training/__pycache__/__init__.cpython-311.pyc differ diff --git a/ECommerce_Churn_Propensity_Model/src/model_training/__pycache__/model_training.cpython-311.pyc b/ECommerce_Churn_Propensity_Model/src/model_training/__pycache__/model_training.cpython-311.pyc new file mode 100644 index 0000000..b259c40 Binary files /dev/null and b/ECommerce_Churn_Propensity_Model/src/model_training/__pycache__/model_training.cpython-311.pyc differ diff --git a/ECommerce_Churn_Propensity_Model/src/model_training/model_training.py b/ECommerce_Churn_Propensity_Model/src/model_training/model_training.py index dd436a3..d9e44c2 100644 --- a/ECommerce_Churn_Propensity_Model/src/model_training/model_training.py +++ b/ECommerce_Churn_Propensity_Model/src/model_training/model_training.py @@ -1,14 +1,10 @@ -import pandas as pd +from pathlib import Path from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, ConfusionMatrixDisplay from matplotlib import pyplot as plt -from pathlib import Path import pickle - +import pandas as pd # Load variables from __init__.py -from . import models_config, insample_scores, outofsample_scores, X, X_train, X_test, y_train, y_test, Config, data_path - -config = Config() -churn_app_path = Path(config.churn_app_models) +from . import models_config, insample_scores, outofsample_scores, data_path, X, X_train, X_test, y_train, y_test, churn_app_path for model in models_config: print(f'COMMENCING TRAINING FOR \'{model}\':') @@ -29,7 +25,7 @@ train_conf_matrix = confusion_matrix(y_train, y_pred) cm_display = ConfusionMatrixDisplay(confusion_matrix=train_conf_matrix, display_labels=[False, True]) cm_display.plot() - plt.savefig(Path(data_path / 'train_conf_matrix.png')) + plt.savefig(Path(data_path) / 'train_conf_matrix.png') # Test set predictions (Out of Sample) y_test_pred = models_config[model].predict(X_test) @@ -46,7 +42,7 @@ test_conf_matrix = confusion_matrix(y_test, y_test_pred) cm_display = ConfusionMatrixDisplay(confusion_matrix=test_conf_matrix, display_labels=[False, True]) cm_display.plot() - plt.savefig(Path(data_path / 'OOS_conf_matrix.png')) + plt.savefig(Path(data_path) / 'OOS_conf_matrix.png') if model == 'logistic_regression': print(f'{model} Feature Importances:\n', models_config[model].coef_) diff --git a/ECommerce_Churn_Propensity_Model/src/preprocessing/__init__.py b/ECommerce_Churn_Propensity_Model/src/preprocessing/__init__.py index 9ff49f1..a288722 100644 --- a/ECommerce_Churn_Propensity_Model/src/preprocessing/__init__.py +++ b/ECommerce_Churn_Propensity_Model/src/preprocessing/__init__.py @@ -22,11 +22,9 @@ def pre_processing(df_path, bins, onehot_cols, output_path, bin_cols=str, sheet_ df[bin_cols] = pd.cut(df[bin_cols], bins, right=False) print(df.value_counts(bin_cols)) - df.set_index('CustomerID', inplace=True) df = pd.get_dummies(df, columns=onehot_cols, dtype=int) # Rename the closed interval '[' columns to suit XGBClassifier() class. Otherwise XGBClassifier() will raise column name errors df.columns = [col.replace('[', '(') for col in df.columns] - # df = df.drop(['Unnamed: 0'], axis=1) # Save PreProcessed Data Frame for downstream consumption df.to_csv(output_path, index=False) diff --git a/ECommerce_Churn_Propensity_Model/src/preprocessing/__pycache__/__init__.cpython-311.pyc b/ECommerce_Churn_Propensity_Model/src/preprocessing/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..92e317c Binary files /dev/null and b/ECommerce_Churn_Propensity_Model/src/preprocessing/__pycache__/__init__.cpython-311.pyc differ diff --git a/ECommerce_Churn_Propensity_Model/src/preprocessing/__pycache__/preprocessing.cpython-311.pyc b/ECommerce_Churn_Propensity_Model/src/preprocessing/__pycache__/preprocessing.cpython-311.pyc new file mode 100644 index 0000000..915db2c Binary files /dev/null and b/ECommerce_Churn_Propensity_Model/src/preprocessing/__pycache__/preprocessing.cpython-311.pyc differ diff --git a/ECommerce_Churn_Propensity_Model/tests.py b/ECommerce_Churn_Propensity_Model/tests.py index 5cde357..0273166 100644 --- a/ECommerce_Churn_Propensity_Model/tests.py +++ b/ECommerce_Churn_Propensity_Model/tests.py @@ -1,6 +1,8 @@ import unittest from pathlib import Path -from .src import read_impute_data, pre_processing, Config +from .src.config import Config +from .src.eda import read_impute_data +from .src.preprocessing import pre_processing config = Config() data_path = config.data_path