Skip to content

Commit 3130b1b

Browse files
Merge pull request #122 from VikramsDataScience/churn-propensity-model
Unit test cases completed. I can now commence with the construction of a CI/CD workflow
2 parents 9c9aa98 + bb56b0d commit 3130b1b

File tree

2 files changed

+19
-32
lines changed

2 files changed

+19
-32
lines changed
Binary file not shown.
Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import unittest
22
from pathlib import Path
3+
import pandas as pd
34
from .src.config import Config
4-
from .src.eda import read_impute_data
5-
from .src.preprocessing import pre_processing
65

76
config = Config()
87
data_path = config.data_path
@@ -17,32 +16,27 @@ class Test(unittest.TestCase):
1716
def test_read_impute_data(self):
1817
"""
1918
Execute Test Case to determine if there are NaNs in the returned
20-
DataFrame produced by the read_impute_data() function.
21-
IMPORTANT NOTE: Please update the 'imputed_path' object as needed when using
22-
this Test Case!
23-
"""
24-
read_impute_test = read_impute_data(df_path=Path(content_file),
25-
sheet_name=1,
26-
float_cols=float_columns,
27-
categorical_cols=categorical_columns,
28-
output_path=imputed_path)
19+
DataFrame produced by the read_impute_data() function from the 'eda' module.
20+
"""
21+
# Load imputed data to run test case
22+
read_impute_test = pd.read_csv(imputed_path)
2923

30-
self.assertFalse(expr=read_impute_test.isna().values.any(),
24+
# Perform tests and generate pass/fail print statements
25+
imputation_test = self.assertFalse(expr=read_impute_test.isna().values.any(),
3126
msg=f'IMPUTATION TEST CASE FAILED. THERE ARE NANS IN THE \'{imputed_path}\' DATAFRAME!')
3227

33-
print(f'IMPUTATION TEST CASE PASSED! THERE ARE NO NANS IN THE \'{imputed_path}\' DATAFRAME')
28+
if imputation_test is None:
29+
print(f'IMPUTATION TEST CASE PASSED! THERE ARE NO NANS IN THE \'{imputed_path}\' DATAFRAME')
3430

3531
def test_preprocessed_data(self):
3632
"""
37-
Execute Test Case to determine if the preprocessed data contains:
33+
Execute Test Case to determine if the preprocessed data from the 'preprocessing' module contains:
3834
- NaNs: Whilst running the pre_processing() the pd.cut() function's 'right' positional arg may be set to True
3935
or not specified. If it's set to either True or not specified, it can create NaNs in the resulting DataFrame.
4036
The 'right' arg needs to set to False.
41-
- renamed columns: The same pd.cut() function will also create a ']' closed column (which is mathematically
42-
very correct!). Whilst this is mathematically sound, the XGBClassifier() hates it and will raise column name
43-
errors in the downstream 'model_training' module. So the 2nd test case will test for existence of these closed columns.
44-
IMPORTANT NOTE: Please update the 'preprocessed_path' object as needed when using
45-
this Test Case!
37+
- renamed columns: The same pd.cut() function will also create a ']' closed column (which is syntactically correct in
38+
mathematics). Whilst this is mathematically sound, the XGBClassifier() hates it and will raise column name errors in the
39+
downstream 'model_training' module. So this 2nd test case will test for existence of these closed columns.
4640
"""
4741
def bracket_test(df):
4842
"""
@@ -52,22 +46,15 @@ def bracket_test(df):
5246
if '[' in col or ']' in col or '{' in col or '}' in col:
5347
return True
5448

55-
pre_process_test = pre_processing(df_path=imputed_path,
56-
bins=[0, 12, 24, 48, 60, 72],
57-
onehot_cols=onehot_categoricals,
58-
output_path=preprocessed_path,
59-
bin_cols='Tenure')
49+
# Load preprocessed data to run test case
50+
preprocessed_df_test = pd.read_csv(preprocessed_path)
6051

6152
# Perform tests and generate pass/fail print statements
62-
nan_test = self.assertFalse(expr=pre_process_test.isna().values.any(),
63-
msg='THERE ARE NANS IN THE INTERVAL COLUMNS! PLEASE CHECK THE PD.CUT() \'RIGHT\' ARG TO DEBUG')
64-
renamed_col_test = self.assertFalse(expr=bracket_test(pre_process_test),
65-
msg='THE INTERVAL BRACKETS ARE INCOMPATIBLE WITH XGBCLASSIFER(). PLEASE CHECK IF THE RETURNED INTERVAL COLUMNS CONTAIN BRACKETS OTHER THAN \'(\' OR \')\'')
66-
67-
if nan_test is None:
68-
print('NAN TEST PASSED! THERE ARE NO NANS IN THE PREPROCESSED DATAFRAME')
53+
renamed_col_test = self.assertFalse(expr=bracket_test(preprocessed_df_test),
54+
msg=f'THE INTERVAL BRACKETS IN \'{preprocessed_path}\' ARE INCOMPATIBLE WITH XGBCLASSIFER(). PLEASE CHECK IF THE RETURNED INTERVAL COLUMNS CONTAIN BRACKETS OTHER THAN \'(\' OR \')\'')
55+
6956
if renamed_col_test is None:
70-
print('BRACKET SHAPE TEST PASSED! THE RETURNED INTERVAL BRACKETS ARE OF A COMPATIBLE SHAPE WITH XGBCLASSIFER()')
57+
print(f'BRACKET SHAPE TEST PASSED! THE RETURNED INTERVAL BRACKETS IN \'{preprocessed_path}\' ARE OF A COMPATIBLE SHAPE WITH XGBCLASSIFER()')
7158

7259
if __name__ == '__main__':
7360
unittest.main()

0 commit comments

Comments
 (0)