|
| 1 | +import pytest |
| 2 | +import pandas as pd |
| 3 | +import os |
| 4 | +import src.features.data_preprocess as dp |
| 5 | + |
| 6 | + |
| 7 | +def test_load_data_valid_file(tmp_path): |
| 8 | + test_file = tmp_path / "test.csv" |
| 9 | + test_file.write_text("col1,col2\n1,2\n3,4") |
| 10 | + |
| 11 | + df = dp.load_data(str(test_file)) |
| 12 | + |
| 13 | + assert isinstance(df, pd.DataFrame) |
| 14 | + assert df.shape == (2, 2) |
| 15 | + |
| 16 | + |
| 17 | +def test_feature_engineering_valid(): |
| 18 | + |
| 19 | + data = { |
| 20 | + "AnnualIncome": [50000, 60000], |
| 21 | + "TotalAssets": [200000, 300000], |
| 22 | + "Experience": [5, 10], |
| 23 | + "LoanAmount": [20000, 25000], |
| 24 | + "NumberOfDependents": [2, 3], |
| 25 | + "TotalLiabilities": [50000, 60000], |
| 26 | + "MonthlyLoanPayment": [1000, 1200], |
| 27 | + "MonthlyIncome": [4000, 5000], |
| 28 | + "MonthlyDebtPayments": [500, 700] |
| 29 | + } |
| 30 | + df = pd.DataFrame(data) |
| 31 | + transformed_df = dp.feature_engineering(df) |
| 32 | + expected_columns = [ |
| 33 | + "AnIncomeToAssetsRatio", "AnExperienceToAnIncomeRatio", |
| 34 | + "LoantoAnIncomeRatio", "DependetToAnIncomeRatio", |
| 35 | + "LoansToAssetsRatio", "LoanPaymentToIncomeRatio", |
| 36 | + "AnIncomeToDepts", "AssetsToLoan" |
| 37 | + ] |
| 38 | + |
| 39 | + for col in expected_columns: |
| 40 | + assert col in transformed_df.columns, f"{col} is missing!" |
| 41 | + |
| 42 | + assert isinstance(transformed_df, pd.DataFrame) |
| 43 | + |
| 44 | + |
| 45 | +@pytest.fixture |
| 46 | +def mock_model_path(tmp_path): |
| 47 | + return tmp_path |
| 48 | + |
| 49 | + |
| 50 | +def test_ordinalencoding_train(mock_model_path): |
| 51 | + |
| 52 | + data = { |
| 53 | + "EmploymentStatus": ["Employed", "Self-Employed", |
| 54 | + "Unemployed", "Employed"] |
| 55 | + } |
| 56 | + df1 = pd.DataFrame(data) |
| 57 | + |
| 58 | + transformed_df = dp.ordinalencoding(df1, str(mock_model_path), train=True) |
| 59 | + |
| 60 | + assert transformed_df["EmploymentStatus"].iloc[0] == 0 |
| 61 | + assert transformed_df["EmploymentStatus"].iloc[1] == 1 |
| 62 | + assert transformed_df["EmploymentStatus"].iloc[2] == 2 |
| 63 | + |
| 64 | + assert os.path.exists(os.path.join(mock_model_path, 'ordinal_encoder.pkl')) |
| 65 | + |
| 66 | + |
| 67 | +def test_ordinalencoding_test(mock_model_path): |
| 68 | + |
| 69 | + data = { |
| 70 | + "EmploymentStatus": ["Employed", "Self-Employed", |
| 71 | + "Unemployed", "Employed"] |
| 72 | + } |
| 73 | + df_train = pd.DataFrame(data) |
| 74 | + df_test = pd.DataFrame(data) |
| 75 | + |
| 76 | + transformed_df_train = dp.ordinalencoding( |
| 77 | + df_train, str(mock_model_path), train=True |
| 78 | + ) |
| 79 | + transformed_df_test = dp.ordinalencoding( |
| 80 | + df_test, str(mock_model_path), train=False |
| 81 | + ) |
| 82 | + |
| 83 | + assert transformed_df_train["EmploymentStatus"].equals( |
| 84 | + transformed_df_test["EmploymentStatus"] |
| 85 | + ) |
0 commit comments