1
1
import unittest
2
2
from pathlib import Path
3
+ import pandas as pd
3
4
from .src .config import Config
4
- from .src .eda import read_impute_data
5
- from .src .preprocessing import pre_processing
6
5
7
6
config = Config ()
8
7
data_path = config .data_path
@@ -17,32 +16,27 @@ class Test(unittest.TestCase):
17
16
def test_read_impute_data (self ):
18
17
"""
19
18
Execute Test Case to determine if there are NaNs in the returned
20
- DataFrame produced by the read_impute_data() function.
21
- IMPORTANT NOTE: Please update the 'imputed_path' object as needed when using
22
- this Test Case!
23
- """
24
- read_impute_test = read_impute_data (df_path = Path (content_file ),
25
- sheet_name = 1 ,
26
- float_cols = float_columns ,
27
- categorical_cols = categorical_columns ,
28
- output_path = imputed_path )
19
+ DataFrame produced by the read_impute_data() function from the 'eda' module.
20
+ """
21
+ # Load imputed data to run test case
22
+ read_impute_test = pd .read_csv (imputed_path )
29
23
30
- self .assertFalse (expr = read_impute_test .isna ().values .any (),
24
+ # Perform tests and generate pass/fail print statements
25
+ imputation_test = self .assertFalse (expr = read_impute_test .isna ().values .any (),
31
26
msg = f'IMPUTATION TEST CASE FAILED. THERE ARE NANS IN THE \' { imputed_path } \' DATAFRAME!' )
32
27
33
- print (f'IMPUTATION TEST CASE PASSED! THERE ARE NO NANS IN THE \' { imputed_path } \' DATAFRAME' )
28
+ if imputation_test is None :
29
+ print (f'IMPUTATION TEST CASE PASSED! THERE ARE NO NANS IN THE \' { imputed_path } \' DATAFRAME' )
34
30
35
31
def test_preprocessed_data (self ):
36
32
"""
37
- Execute Test Case to determine if the preprocessed data contains:
33
+ Execute Test Case to determine if the preprocessed data from the 'preprocessing' module contains:
38
34
- NaNs: Whilst running the pre_processing() the pd.cut() function's 'right' positional arg may be set to True
39
35
or not specified. If it's set to either True or not specified, it can create NaNs in the resulting DataFrame.
40
36
The 'right' arg needs to set to False.
41
- - renamed columns: The same pd.cut() function will also create a ']' closed column (which is mathematically
42
- very correct!). Whilst this is mathematically sound, the XGBClassifier() hates it and will raise column name
43
- errors in the downstream 'model_training' module. So the 2nd test case will test for existence of these closed columns.
44
- IMPORTANT NOTE: Please update the 'preprocessed_path' object as needed when using
45
- this Test Case!
37
+ - renamed columns: The same pd.cut() function will also create a ']' closed column (which is syntactically correct in
38
+ mathematics). Whilst this is mathematically sound, the XGBClassifier() hates it and will raise column name errors in the
39
+ downstream 'model_training' module. So this 2nd test case will test for existence of these closed columns.
46
40
"""
47
41
def bracket_test (df ):
48
42
"""
@@ -52,22 +46,15 @@ def bracket_test(df):
52
46
if '[' in col or ']' in col or '{' in col or '}' in col :
53
47
return True
54
48
55
- pre_process_test = pre_processing (df_path = imputed_path ,
56
- bins = [0 , 12 , 24 , 48 , 60 , 72 ],
57
- onehot_cols = onehot_categoricals ,
58
- output_path = preprocessed_path ,
59
- bin_cols = 'Tenure' )
49
+ # Load preprocessed data to run test case
50
+ preprocessed_df_test = pd .read_csv (preprocessed_path )
60
51
61
52
# Perform tests and generate pass/fail print statements
62
- nan_test = self .assertFalse (expr = pre_process_test .isna ().values .any (),
63
- msg = 'THERE ARE NANS IN THE INTERVAL COLUMNS! PLEASE CHECK THE PD.CUT() \' RIGHT\' ARG TO DEBUG' )
64
- renamed_col_test = self .assertFalse (expr = bracket_test (pre_process_test ),
65
- msg = 'THE INTERVAL BRACKETS ARE INCOMPATIBLE WITH XGBCLASSIFER(). PLEASE CHECK IF THE RETURNED INTERVAL COLUMNS CONTAIN BRACKETS OTHER THAN \' (\' OR \' )\' ' )
66
-
67
- if nan_test is None :
68
- print ('NAN TEST PASSED! THERE ARE NO NANS IN THE PREPROCESSED DATAFRAME' )
53
+ renamed_col_test = self .assertFalse (expr = bracket_test (preprocessed_df_test ),
54
+ msg = f'THE INTERVAL BRACKETS IN \' { preprocessed_path } \' ARE INCOMPATIBLE WITH XGBCLASSIFER(). PLEASE CHECK IF THE RETURNED INTERVAL COLUMNS CONTAIN BRACKETS OTHER THAN \' (\' OR \' )\' ' )
55
+
69
56
if renamed_col_test is None :
70
- print ('BRACKET SHAPE TEST PASSED! THE RETURNED INTERVAL BRACKETS ARE OF A COMPATIBLE SHAPE WITH XGBCLASSIFER()' )
57
+ print (f 'BRACKET SHAPE TEST PASSED! THE RETURNED INTERVAL BRACKETS IN \' { preprocessed_path } \' ARE OF A COMPATIBLE SHAPE WITH XGBCLASSIFER()' )
71
58
72
59
if __name__ == '__main__' :
73
60
unittest .main ()
0 commit comments