forked from paulgureghian/PyTorch_Projects
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bitcoin_price _prediction.py
251 lines (181 loc) · 5.55 KB
/
bitcoin_price _prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
""" Created by Paul A. Gureghian on 9/26/2018 """
""" This Python script uses keras to predict bitcoin price """
""" I picked a Recurrent Neural Network and a Bitcoin dataset """
### import packages
#import os
import numpy as np
import pandas as pd
from statistics import mean
from matplotlib import pyplot as plt
from keras.layers import LSTM
from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import History
history = History()
import mlflow
import mlflow.keras
mlflow.set_tracking_uri('/Users/paulgureghian/mlruns')
from sklearn.preprocessing import MinMaxScaler
### read in the dataset to a dataframe
pd.set_option('display.max_columns', 8)
pd.set_option('display.width', 1000)
df = pd.read_csv('/bitstamp.csv')
print(df.head())
print('')
print(df.shape)
print('')
### encode the date
df['date'] = pd.to_datetime(df['Timestamp'], unit ='s').dt.date
group = df.groupby('date')
Real_Price = group['Weighted_Price'].mean()
print(Real_Price.head())
print('')
print(Real_Price.shape)
print('')
### split dataset into train and test sets
prediction_days = 30
df_train = Real_Price[:len(Real_Price) - prediction_days]
df_test = Real_Price[len(Real_Price) - prediction_days:]
print(df_train.head())
print('')
print(df_train.shape)
print('')
print(df_test.head())
print('')
print(df_test.shape)
print('')
### preprocess the data by reshaping it
training_set = df_train.values
training_set = np.reshape(training_set, (len(training_set),1))
print("Training set after reshaping:")
print('')
print(training_set)
print('')
print(training_set.shape)
print('')
### preprocess the data by scaling it
sc = MinMaxScaler()
training_set = sc.fit_transform(training_set)
X_train = training_set[0 : len(training_set) -1]
y_train = training_set[1 : len(training_set)]
X_train = np.reshape(X_train, (len(X_train),1, 1))
print("Scaled training set:")
print('')
print(training_set)
print('')
print("Define X_train")
print('')
print(X_train)
print('')
print(X_train.shape)
print("Define y_train:")
print('')
print(y_train)
print('')
print(y_train.shape)
print('')
print("X_train reshaped:")
print('')
print(X_train)
print('')
print(X_train.shape)
print('')
### instantiate the RNN model object
regr = Sequential()
### add the input and LSTM layers
regr.add(LSTM(units =4, activation ='sigmoid', input_shape =(None, 1)))
### add the output layer
regr.add(Dense(units =1))
### compile the RNN
optimizer = 'RMSprop'
regr.compile(optimizer, loss = 'mean_squared_error', metrics = ['mae'])
### fit the model on the training set
batch_size = 15
epochs = 20
history = regr.fit(X_train, y_train, batch_size, epochs, callbacks=[history])
loss = history.history['loss']
loss = mean(loss)
print("loss_mean_squared_error: ", loss)
metrics = history.history['mean_absolute_error']
metrics = mean(metrics)
print("metrics_mae: ", metrics)
print('')
### create predictions on the test set
test_set = df_test.values
inputs = np.reshape(test_set, (len(test_set), 1))
inputs = sc.transform(inputs)
inputs = np.reshape(inputs, (len(inputs), 1, 1))
predicted_BTC_price = regr.predict(inputs)
predicted_BTC_price = sc.inverse_transform(predicted_BTC_price)
print("Test set after reshaping:")
print('')
print(inputs)
print('')
print(inputs.shape)
print('')
print("Scaled inputs:")
print('')
print(inputs)
print('')
print("Reshaped inputs:")
print('')
print(inputs)
print('')
print(inputs.shape)
print('')
print("Predicted BTC price: ", predicted_BTC_price)
print('')
print("Scaled predicted BTC price: ", predicted_BTC_price)
print('')
### get evaluation of the model predictions
model_evaluation = regr.evaluate(inputs, predicted_BTC_price)
model_evaluation = float(model_evaluation[0])
print("Model evaluation is: ", model_evaluation)
print('')
### visualize the results
print("Visualize the results:")
print('')
### plot the actual and predicted prices
fig = plt.figure(figsize =(25, 20), dpi =80, facecolor ='w', edgecolor ='k')
ax = plt.gca()
plt.plot(test_set, color = 'red', label = "Real BTC Price")
plt.plot(predicted_BTC_price, color = 'blue', label = "Predicted BTC Price")
plt.title("BTC Price Prediction", fontsize = 40)
plt.axis('tight')
### reindex the 'df_test' dataframe
df_test = df_test.reset_index()
x = df_test.index
### set labels
labels = df_test['date']
### set xticks
plt.xticks(x, labels, rotation = 'vertical')
### set fontsize for 'x' and 'y' ticks
for tick in ax.xaxis.get_major_ticks():
tick.label1.set_fontsize(18)
for tick in ax.yaxis.get_major_ticks():
tick.label1.set_fontsize(18)
### set plot labels
plt.xlabel('Time', fontsize = 40)
plt.ylabel('BTC Price(USD)', fontsize = 40)
### set plot legend
plt.legend(loc = 2, prop = {'size' : 25})
### show the plot
plt.show()
### save the plot
fig.savefig('btc_price_prediction_plot.png')
### log params with mlflow
with mlflow.start_run() as run:
mlflow.log_param("epochs", epochs)
mlflow.log_param("optimizer", optimizer)
mlflow.log_param("batch_size", batch_size)
### log metrics with mlflow
mlflow.log_metric("loss_mse", loss)
mlflow.log_metric("metrics_mae", metrics)
mlflow.log_metric("model_evaluation", model_evaluation)
### log artifacts and model with mlflow
mlflow.log_artifact('btc_price_prediction_plot.png')
model_path = "models"
mlflow.keras.log_model(regr, model_path)
with open("info.txt", "w") as f:
f.write("btc_price_prediction_plot")
mlflow.log_artifact("info.txt")