-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Switched away from pyramid ARIMA due to stability issues * Now supports uncompressed pickle files (rather than just bzip2 compressed) * Using Python 3.7.3 * Handles imported data when dates are stored as strings rather than Timestamp objects * Corrected unigram handling
- Loading branch information
1 parent
776dda3
commit 84f6905
Showing
25 changed files
with
516 additions
and
126 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,6 +82,7 @@ everybody | |
everyone | ||
everything | ||
everywhere | ||
excess | ||
f | ||
few | ||
find | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
situation | ||
consist | ||
first | ||
plurality | ||
plurality | ||
second |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
etc | ||
etc | ||
cover | ||
adjacent |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,76 @@ | ||
import warnings | ||
|
||
import numpy as np | ||
from numpy import clip, inf | ||
from pyramid.arima import auto_arima | ||
from sklearn.metrics import mean_squared_error | ||
from statsmodels.tsa.arima_model import ARIMA | ||
|
||
|
||
class ARIMAForecast(object): | ||
|
||
def __init__(self, data_in, num_prediction_periods): | ||
if not all(isinstance(x, float) for x in data_in): | ||
raise ValueError('Time series must be all float values') | ||
def __evaluate_models(self, dataset, p_values, d_values, q_values): | ||
dataset=np.array(dataset) | ||
dataset = dataset.astype('float32') | ||
best_score, best_cfg = float("inf"), None | ||
for p in p_values: | ||
for d in d_values: | ||
for q in q_values: | ||
order = (p, d, q) | ||
try: | ||
mse = self.__evaluate_arima_model(dataset, order, ground_truth_in_history=True) | ||
if mse < best_score: | ||
best_score = mse | ||
best_cfg = order | ||
except: | ||
continue | ||
return best_cfg, best_score | ||
|
||
def __evaluate_arima_model(self, X, arima_order, ground_truth_in_history=False): | ||
|
||
train_ratio = 0.8 | ||
train_size = int(len(X) * train_ratio) | ||
train, test = X[0:train_size], X[train_size:] | ||
history = [x for x in train] | ||
predictions = list() | ||
|
||
self.__history = data_in | ||
self.__num_prediction_periods = num_prediction_periods | ||
for t in range(len(test)): | ||
model = ARIMA(history, order=arima_order) | ||
model_fit = model.fit(disp=0, maxiter=200) | ||
yhat = model_fit.forecast()[0][0] | ||
predictions.append(yhat) | ||
history.append(test[t] if ground_truth_in_history else yhat) | ||
error = mean_squared_error(test, predictions) | ||
return error | ||
|
||
self.__stepwise_model = auto_arima( | ||
data_in, | ||
seasonal=False, | ||
error_action='ignore', suppress_warnings=True, stepwise=True | ||
) | ||
def __arima_model_predict(self, X, arima_order, steps_ahead): | ||
# make predictions | ||
predictions = list() | ||
try: | ||
for t in range(steps_ahead): | ||
model = ARIMA(X, order=arima_order) | ||
model_fit = model.fit(disp=0) | ||
yhat = model_fit.forecast()[0][0] | ||
predictions.append(yhat) | ||
X = np.append(X, yhat) | ||
except: | ||
predictions.extend([np.nan] * (steps_ahead - len(predictions))) | ||
|
||
return predictions | ||
|
||
def __init__(self, data_in, num_prediction_periods ): | ||
if not all(isinstance(x, float) for x in data_in): | ||
raise ValueError('Time series must be all float values') | ||
|
||
self.__stepwise_model.fit(data_in) | ||
p_values = [0, 1, 2, 4, 6] | ||
d_values = range(0, 3) | ||
q_values = range(0, 3) | ||
warnings.filterwarnings("ignore") | ||
self.__order, score = self.__evaluate_models(data_in, p_values, d_values, q_values) | ||
self.__predictions = self.__arima_model_predict(data_in, self.__order, num_prediction_periods) | ||
|
||
@property | ||
def configuration(self): | ||
return self.__stepwise_model.order | ||
return self.__order | ||
|
||
def predict_counts(self): | ||
return clip(self.__stepwise_model.predict(n_periods=self.__num_prediction_periods), 0, inf) | ||
return clip(self.__predictions, 0, inf) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.