-
Notifications
You must be signed in to change notification settings - Fork 256
/
util.py
78 lines (60 loc) · 3.46 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import pandas as pd
from sklearn import preprocessing
import numpy as np
history_points = 50
def csv_to_dataset(csv_path):
data = pd.read_csv(csv_path)
data = data.drop('date', axis=1)
data = data.drop(0, axis=0)
data = data.values
data_normaliser = preprocessing.MinMaxScaler()
data_normalised = data_normaliser.fit_transform(data)
# using the last {history_points} open close high low volume data points, predict the next open value
ohlcv_histories_normalised = np.array([data_normalised[i:i + history_points].copy() for i in range(len(data_normalised) - history_points)])
next_day_open_values_normalised = np.array([data_normalised[:, 0][i + history_points].copy() for i in range(len(data_normalised) - history_points)])
next_day_open_values_normalised = np.expand_dims(next_day_open_values_normalised, -1)
next_day_open_values = np.array([data[:, 0][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_open_values = np.expand_dims(next_day_open_values, -1)
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser.fit(next_day_open_values)
def calc_ema(values, time_period):
# https://www.investopedia.com/ask/answers/122314/what-exponential-moving-average-ema-formula-and-how-ema-calculated.asp
sma = np.mean(values[:, 3])
ema_values = [sma]
k = 2 / (1 + time_period)
for i in range(len(his) - time_period, len(his)):
close = his[i][3]
ema_values.append(close * k + ema_values[-1] * (1 - k))
return ema_values[-1]
technical_indicators = []
for his in ohlcv_histories_normalised:
# note since we are using his[3] we are taking the SMA of the closing price
sma = np.mean(his[:, 3])
macd = calc_ema(his, 12) - calc_ema(his, 26)
technical_indicators.append(np.array([sma]))
# technical_indicators.append(np.array([sma,macd,]))
technical_indicators = np.array(technical_indicators)
tech_ind_scaler = preprocessing.MinMaxScaler()
technical_indicators_normalised = tech_ind_scaler.fit_transform(technical_indicators)
assert ohlcv_histories_normalised.shape[0] == next_day_open_values_normalised.shape[0] == technical_indicators_normalised.shape[0]
return ohlcv_histories_normalised, technical_indicators_normalised, next_day_open_values_normalised, next_day_open_values, y_normaliser
def multiple_csv_to_dataset(test_set_name):
import os
ohlcv_histories = 0
technical_indicators = 0
next_day_open_values = 0
for csv_file_path in list(filter(lambda x: x.endswith('daily.csv'), os.listdir('./'))):
if not csv_file_path == test_set_name:
print(csv_file_path)
if type(ohlcv_histories) == int:
ohlcv_histories, technical_indicators, next_day_open_values, _, _ = csv_to_dataset(csv_file_path)
else:
a, b, c, _, _ = csv_to_dataset(csv_file_path)
ohlcv_histories = np.concatenate((ohlcv_histories, a), 0)
technical_indicators = np.concatenate((technical_indicators, b), 0)
next_day_open_values = np.concatenate((next_day_open_values, c), 0)
ohlcv_train = ohlcv_histories
tech_ind_train = technical_indicators
y_train = next_day_open_values
ohlcv_test, tech_ind_test, y_test, unscaled_y_test, y_normaliser = csv_to_dataset(test_set_name)
return ohlcv_train, tech_ind_train, y_train, ohlcv_test, tech_ind_test, y_test, unscaled_y_test, y_normaliser