Skip to content

Commit 6a79d64

Browse files
committed
create tests/test_features/test_data_preprocess.py
1 parent 41020e2 commit 6a79d64

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import pytest
2+
import pandas as pd
3+
import os
4+
import src.features.data_preprocess as dp
5+
6+
7+
def test_load_data_valid_file(tmp_path):
8+
test_file = tmp_path / "test.csv"
9+
test_file.write_text("col1,col2\n1,2\n3,4")
10+
11+
df = dp.load_data(str(test_file))
12+
13+
assert isinstance(df, pd.DataFrame)
14+
assert df.shape == (2, 2)
15+
16+
17+
def test_feature_engineering_valid():
18+
19+
data = {
20+
"AnnualIncome": [50000, 60000],
21+
"TotalAssets": [200000, 300000],
22+
"Experience": [5, 10],
23+
"LoanAmount": [20000, 25000],
24+
"NumberOfDependents": [2, 3],
25+
"TotalLiabilities": [50000, 60000],
26+
"MonthlyLoanPayment": [1000, 1200],
27+
"MonthlyIncome": [4000, 5000],
28+
"MonthlyDebtPayments": [500, 700]
29+
}
30+
df = pd.DataFrame(data)
31+
transformed_df = dp.feature_engineering(df)
32+
expected_columns = [
33+
"AnIncomeToAssetsRatio", "AnExperienceToAnIncomeRatio",
34+
"LoantoAnIncomeRatio", "DependetToAnIncomeRatio",
35+
"LoansToAssetsRatio", "LoanPaymentToIncomeRatio",
36+
"AnIncomeToDepts", "AssetsToLoan"
37+
]
38+
39+
for col in expected_columns:
40+
assert col in transformed_df.columns, f"{col} is missing!"
41+
42+
assert isinstance(transformed_df, pd.DataFrame)
43+
44+
45+
@pytest.fixture
46+
def mock_model_path(tmp_path):
47+
return tmp_path
48+
49+
50+
def test_ordinalencoding_train(mock_model_path):
51+
52+
data = {
53+
"EmploymentStatus": ["Employed", "Self-Employed",
54+
"Unemployed", "Employed"]
55+
}
56+
df1 = pd.DataFrame(data)
57+
58+
transformed_df = dp.ordinalencoding(df1, str(mock_model_path), train=True)
59+
60+
assert transformed_df["EmploymentStatus"].iloc[0] == 0
61+
assert transformed_df["EmploymentStatus"].iloc[1] == 1
62+
assert transformed_df["EmploymentStatus"].iloc[2] == 2
63+
64+
assert os.path.exists(os.path.join(mock_model_path, 'ordinal_encoder.pkl'))
65+
66+
67+
def test_ordinalencoding_test(mock_model_path):
68+
69+
data = {
70+
"EmploymentStatus": ["Employed", "Self-Employed",
71+
"Unemployed", "Employed"]
72+
}
73+
df_train = pd.DataFrame(data)
74+
df_test = pd.DataFrame(data)
75+
76+
transformed_df_train = dp.ordinalencoding(
77+
df_train, str(mock_model_path), train=True
78+
)
79+
transformed_df_test = dp.ordinalencoding(
80+
df_test, str(mock_model_path), train=False
81+
)
82+
83+
assert transformed_df_train["EmploymentStatus"].equals(
84+
transformed_df_test["EmploymentStatus"]
85+
)

0 commit comments

Comments
 (0)