Skip to content

Commit

Permalink
Merge pull request #122 from VikramsDataScience/churn-propensity-model
Browse files Browse the repository at this point in the history
Unit test cases completed. I can now commence with the construction of a CI/CD workflow
  • Loading branch information
VikramsDataScience authored Sep 1, 2024
2 parents 9c9aa98 + bb56b0d commit 3130b1b
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 32 deletions.
Binary file modified ECommerce_Churn_Propensity_Model/__pycache__/tests.cpython-311.pyc
Binary file not shown.
51 changes: 19 additions & 32 deletions ECommerce_Churn_Propensity_Model/tests.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import unittest
from pathlib import Path
import pandas as pd
from .src.config import Config
from .src.eda import read_impute_data
from .src.preprocessing import pre_processing

config = Config()
data_path = config.data_path
Expand All @@ -17,32 +16,27 @@ class Test(unittest.TestCase):
def test_read_impute_data(self):
"""
Execute Test Case to determine if there are NaNs in the returned
DataFrame produced by the read_impute_data() function.
IMPORTANT NOTE: Please update the 'imputed_path' object as needed when using
this Test Case!
"""
read_impute_test = read_impute_data(df_path=Path(content_file),
sheet_name=1,
float_cols=float_columns,
categorical_cols=categorical_columns,
output_path=imputed_path)
DataFrame produced by the read_impute_data() function from the 'eda' module.
"""
# Load imputed data to run test case
read_impute_test = pd.read_csv(imputed_path)

self.assertFalse(expr=read_impute_test.isna().values.any(),
# Perform tests and generate pass/fail print statements
imputation_test = self.assertFalse(expr=read_impute_test.isna().values.any(),
msg=f'IMPUTATION TEST CASE FAILED. THERE ARE NANS IN THE \'{imputed_path}\' DATAFRAME!')

print(f'IMPUTATION TEST CASE PASSED! THERE ARE NO NANS IN THE \'{imputed_path}\' DATAFRAME')
if imputation_test is None:
print(f'IMPUTATION TEST CASE PASSED! THERE ARE NO NANS IN THE \'{imputed_path}\' DATAFRAME')

def test_preprocessed_data(self):
"""
Execute Test Case to determine if the preprocessed data contains:
Execute Test Case to determine if the preprocessed data from the 'preprocessing' module contains:
- NaNs: Whilst running the pre_processing() the pd.cut() function's 'right' positional arg may be set to True
or not specified. If it's set to either True or not specified, it can create NaNs in the resulting DataFrame.
The 'right' arg needs to set to False.
- renamed columns: The same pd.cut() function will also create a ']' closed column (which is mathematically
very correct!). Whilst this is mathematically sound, the XGBClassifier() hates it and will raise column name
errors in the downstream 'model_training' module. So the 2nd test case will test for existence of these closed columns.
IMPORTANT NOTE: Please update the 'preprocessed_path' object as needed when using
this Test Case!
- renamed columns: The same pd.cut() function will also create a ']' closed column (which is syntactically correct in
mathematics). Whilst this is mathematically sound, the XGBClassifier() hates it and will raise column name errors in the
downstream 'model_training' module. So this 2nd test case will test for existence of these closed columns.
"""
def bracket_test(df):
"""
Expand All @@ -52,22 +46,15 @@ def bracket_test(df):
if '[' in col or ']' in col or '{' in col or '}' in col:
return True

pre_process_test = pre_processing(df_path=imputed_path,
bins=[0, 12, 24, 48, 60, 72],
onehot_cols=onehot_categoricals,
output_path=preprocessed_path,
bin_cols='Tenure')
# Load preprocessed data to run test case
preprocessed_df_test = pd.read_csv(preprocessed_path)

# Perform tests and generate pass/fail print statements
nan_test = self.assertFalse(expr=pre_process_test.isna().values.any(),
msg='THERE ARE NANS IN THE INTERVAL COLUMNS! PLEASE CHECK THE PD.CUT() \'RIGHT\' ARG TO DEBUG')
renamed_col_test = self.assertFalse(expr=bracket_test(pre_process_test),
msg='THE INTERVAL BRACKETS ARE INCOMPATIBLE WITH XGBCLASSIFER(). PLEASE CHECK IF THE RETURNED INTERVAL COLUMNS CONTAIN BRACKETS OTHER THAN \'(\' OR \')\'')

if nan_test is None:
print('NAN TEST PASSED! THERE ARE NO NANS IN THE PREPROCESSED DATAFRAME')
renamed_col_test = self.assertFalse(expr=bracket_test(preprocessed_df_test),
msg=f'THE INTERVAL BRACKETS IN \'{preprocessed_path}\' ARE INCOMPATIBLE WITH XGBCLASSIFER(). PLEASE CHECK IF THE RETURNED INTERVAL COLUMNS CONTAIN BRACKETS OTHER THAN \'(\' OR \')\'')

if renamed_col_test is None:
print('BRACKET SHAPE TEST PASSED! THE RETURNED INTERVAL BRACKETS ARE OF A COMPATIBLE SHAPE WITH XGBCLASSIFER()')
print(f'BRACKET SHAPE TEST PASSED! THE RETURNED INTERVAL BRACKETS IN \'{preprocessed_path}\' ARE OF A COMPATIBLE SHAPE WITH XGBCLASSIFER()')

if __name__ == '__main__':
unittest.main()

0 comments on commit 3130b1b

Please sign in to comment.