forked from AntoineAugusti/vacances-scolaires
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_data.py
130 lines (102 loc) · 3.55 KB
/
test_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# -*- coding: utf-8 -*-
import unittest
import datetime
import pandas as pd
class DataTest(unittest.TestCase):
START_YEAR, END_YEAR = 1990, 2020
HOLIDAY_NAMES = [
'Vacances de la Toussaint',
'Vacances de Noël',
"Vacances d'hiver",
'Vacances de printemps',
"Vacances d'été"
]
ZONES = ['A', 'B', 'C']
def data(self):
return pd.read_csv(
'data.csv',
parse_dates=['date']
)
def col_zone(self, zone):
if zone not in self.ZONES:
raise ValueError
return 'vacances_zone_' + zone.lower()
def data_with_holiday(self):
def on_holiday(row):
res = False
for zone in self.ZONES:
res = res or row[self.col_zone(zone)]
return res
df = self.data()
df['on_holiday'] = df.apply(lambda row: on_holiday(row), axis=1)
return df
def test_columns(self):
expected = [
'date', 'vacances_zone_a', 'vacances_zone_b',
'vacances_zone_c', 'nom_vacances'
]
self.assertEquals(list(self.data().columns), expected)
def test_no_missing_dates(self):
start = datetime.datetime(self.START_YEAR, 1, 1)
end = datetime.datetime(self.END_YEAR, 12, 31)
pd.testing.assert_series_equal(
self.data().date,
pd.Series(pd.date_range(start=start, end=end)),
check_names=False,
check_exact=True
)
def test_nom_vacances(self):
self.assertEquals(
sorted(list(self.data().nom_vacances.dropna().unique())),
sorted(self.HOLIDAY_NAMES)
)
def test_boolean_values(self):
cols = map(self.col_zone, self.ZONES)
for col in cols:
self.assertEquals(self.data()[col].dtype, bool)
self.assertEquals(
set(self.data()[col].unique()),
set([False, True])
)
def test_holiday_name_set_but_not_on_holiday(self):
df = self.data_with_holiday()
impossible = df[~df.on_holiday & ~df.nom_vacances.isna()]
self.assertEquals(
impossible.shape,
(0, 6),
impossible
)
def test_on_holiday_without_holidayname(self):
df = self.data_with_holiday()
impossible = df[df.on_holiday & df.nom_vacances.isna()]
self.assertEquals(
impossible.shape,
(0, 6),
impossible
)
def test_no_gap_in_holidays(self):
df = self.data()
df_shifted = df.shift(periods=1)
# Count number of times we start and end holidays.
# It counts for each holiday the change from False to True and
# True to False
nb_years = (self.END_YEAR - self.START_YEAR + 1)
nb_missing_holidays = 5
nb_holidays = (nb_years * len(self.HOLIDAY_NAMES) - nb_missing_holidays)
expected = nb_holidays * 2
for zone in self.ZONES:
diff = df_shifted[self.col_zone(zone)] - df[self.col_zone(zone)]
self.assertEquals(
diff.abs().sum(),
expected,
'Zone {zone} seems to have a gap'.format(zone=zone)
)
# Detect a faulty sequence for nom_vacances like:
# ['Vacances d'hiver', 'Vacances d'hiver', 'Vacances de la Toussaint']
diff = df_shifted['nom_vacances'].fillna('') != df['nom_vacances'].fillna('')
self.assertEquals(
diff.sum(),
expected
)
if __name__ == '__main__':
unittest.main()