-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharima.py
140 lines (105 loc) · 4.55 KB
/
arima.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import pandas as pd
import matplotlib.pyplot as plt
from helper import *
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf , plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
cwd = os.getcwd()
inf_ = load_transform_data(data_file = 'DZ_Consumption_price_index.csv') # dataframe (300,1) 1-1-88 to 1-12-2022
inf = inf_.loc['1998-01-01':'2020-12-01','CPI(%)']
inf_test = inf_.loc['2021-01-01':'2022-12-01','CPI(%)']
# Appluing the adfuller test
results = adfuller(inf)
inf_stationary = inf.diff().dropna()
results = adfuller(inf_stationary)
# Searching over AIC and BIC
order_aic_bic = []
for p in range (1,13):
for q in range(1,13):
try:
model = sm.tsa.arima.ARIMA(inf,order = (p,1,q))
results = model.fit()
order_aic_bic.append((p,q,results.aic,results.bic))
except:
order_aic_bic.append((p,q,None,None))
order_df = pd.DataFrame(order_aic_bic,columns=['p','q','aic','bic'])
order_df.to_csv(cwd + '\\results\\arima order sellction.csv')
# sort by aic and bic
print("AIC:", order_df.sort_values('aic').iloc[0,:2])
print("BIC:", order_df.sort_values('bic').iloc[0,:2])
arima_models = ['ARIMA-AIC','ARIMA-BIC']
metrics = ['mse','rmse','mae']
horizons= [6,12,18,24]
results = pd.DataFrame(columns=['model', 'horizon', 'metric', 'value'])
for model in arima_models:
user_input_hyp_param = input(f"Please enter respectively the following optimal parameters for {model} model : p , q separated by commas: ")
# split the input into separate values
values = user_input_hyp_param.split(",")
# convert the values to the appropriate data type (e.g. int, float, str)
param = []
for value in values:
value = value.strip()
if value.isdigit():
param.append(int(value))
elif "." in value and value.replace(".", "").isdigit():
param.append(float(value))
else:
param.append(value)
model_ = sm.tsa.arima.ARIMA(inf,order = (param[0],1,param[1]),freq= 'MS')
reg_results = model_.fit()
resids = reg_results.resid
fig,ax = plt.subplots(figsize =(8,8))
ax = reg_results.plot_diagnostics()
plt.savefig(cwd +f'\\results\\graphs\\{model}_models_diagnostics.png',dpi=300, bbox_inches='tight')
# plt.show()
# Forecasting
forecast = reg_results.get_forecast(steps =24)
mean_forecast = forecast.predicted_mean # from
confidence_intervals= forecast.conf_int()
# print(confidence_intervals)
# Generate forecasts for test data
forecast = reg_results.forecast(steps=24)
# # Calculate MAE, MSE, RMSE, and MAPE of the forecasts
# mae = mean_absolute_error(inf_test, forecast)
# mse = mean_squared_error(inf_test, forecast)
# rmse = np.sqrt(mse)
# print('MAE_model: ', mae)
# print('MSE_model: ', mse)
# print('RMSE_mode: ', rmse)
for metric in metrics:
for h in horizons:
if metric == 'mse':
loss = mean_squared_error(inf_test[:h], forecast[:h])
elif metric == 'rmse':
loss = np.sqrt(mean_squared_error(inf_test[:h], forecast[:h]))
else :
loss = mean_absolute_error(inf_test[:h], forecast[:h])
results = results.append({'model': model, 'horizon': h, 'metric': metric, 'value': loss}, ignore_index=True)
# metrics=['mse','rmse','ame']
# for i in metrics:
# print(f'{i}_{model} :',loss_comp(mean_forecast, inf_test.values.reshape(-1),loss=i))
fig2 = plt.figure(figsize =(8,8))
# plot the original time series
plt.plot(inf_test, label='Original Time Series')
# plot the mean forecast
plt.plot(mean_forecast, label='Mean Forecast')
# plot the upper and lower confidence intervals
plt.fill_between(x=mean_forecast.index, y1=confidence_intervals['lower CPI(%)'],
y2=confidence_intervals['upper CPI(%)'], alpha=0.2,
color='gray', label='Confidence Interval')
# set plot title and axis labels
plt.title(f'{model} Forecast of CPI(%)')
plt.xlabel('Date')
plt.ylabel('CPI(%)')
# add legend to the plot
plt.legend()
# savefig
fig2.savefig(cwd +f'\\results\\graphs\\Actual vs predicted values of {model} model (testing dataset).png',dpi=300, bbox_inches='tight')
# display the plot
# plt.show()
results.to_csv(cwd +f'\\results\\ARIMA forecasting performence.csv')