-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_data_prep.py
74 lines (55 loc) · 2.34 KB
/
test_data_prep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pytest
import pandas as pd
from io import StringIO
from data_prep import prep_bird_flu_data, prep_egg_price_data, prep_stock_price_data
from test_helper_data_prep import (
create_stock_ex,
create_egg_price_ex,
create_bird_flu_ex,
)
@pytest.mark.parametrize(
"func, df, expected_exception",
[
(prep_stock_price_data, pd.DataFrame({"Open": [100, 101, 102]}), KeyError), # Missing 'Close/Last'
(prep_egg_price_data, pd.DataFrame({"Price": [2.5, 3.0, 3.2]}), ValueError), # Missing 'Year'
(prep_bird_flu_data, pd.DataFrame({"Flock Size": [10, 20]}), KeyError), # Missing 'State'
]
)
def test_prep_functions_raise_errors(func, df, expected_exception):
print("\n================ DEBUG INFO =================")
print(f"Testing function: {func.__name__}")
print("Expected exception:", expected_exception)
print("Input DataFrame:")
print(df)
print("=============================================")
with pytest.raises(expected_exception):
func(df)
def test_stock_price_columns_numeric():
"""
Test that prep_stock_price_data produces numeric columns (e.g., 'Close/Last').
"""
stock_df = pd.read_csv(StringIO(create_stock_ex()))
df = prep_stock_price_data(stock_df)
assert pd.api.types.is_numeric_dtype(df["Close/Last"]), "Close/Last must be numeric."
def test_egg_price_date_col_is_datetime():
"""
Test that prep_egg_price_data sets the DataFrame index to a datetime.
"""
egg_df = pd.read_csv(StringIO(create_egg_price_ex()))
df = prep_egg_price_data(egg_df)
assert isinstance(df.index, pd.DatetimeIndex), "Index should be a DatetimeIndex."
def test_bird_flu_has_lat_lng():
"""
Test that prep_bird_flu_data returns a DataFrame with 'lat' and 'lng' columns.
"""
bird_flu_df = pd.read_csv(StringIO(create_bird_flu_ex()))
df = prep_bird_flu_data(bird_flu_df)
assert "lat" in df.columns, "DataFrame must have 'lat' column."
assert "lng" in df.columns, "DataFrame must have 'lng' column."
def test_bird_flu_flock_size_is_numeric():
"""
Test that prep_bird_flu_data ensures 'Flock Size' is numeric.
"""
bird_flu_df = pd.read_csv(StringIO(create_bird_flu_ex()))
df = prep_bird_flu_data(bird_flu_df)
assert pd.api.types.is_numeric_dtype(df["Flock Size"]), "Flock Size should be numeric."