-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkeras_example.py
140 lines (113 loc) · 5.36 KB
/
keras_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import time
from tensorflow import convert_to_tensor
import gc
import tensorflow as tf
from tensorflow import keras
from numerapi import NumerAPI
from utils_plus import *
model_path = None
# 'models/NN/checkpoint'
# Use this to load saved weights
start = time.time()
napi = NumerAPI()
spinner = Halo(text='', spinner='dots')
current_round = napi.get_current_round(tournament=8) # tournament 8 is the primary Numerai Tournament
download_data()
# read the feature metadata and get the "medium" feature set (We will need a data center grade server if we want to
# process all the Numerai features!)
with open("./data/features.json", "r") as f:
feature_metadata = json.load(f)
features = feature_metadata["feature_sets"]["medium"]
# read the training and validation data given the predefined features stored in parquets as pandas DataFrames
training_data, validation_data = read_learning_data(features)
# extract feature matrix and target vector used for training
X_train = training_data.filter(like='feature_', axis='columns')
y_train = training_data[TARGET_COL]
# extract feature matrix and target vector used for validation
X_val = validation_data.filter(like='feature_', axis='columns')
y_val = validation_data[TARGET_COL]
# "garbage collection" (gc) gets rid of unused data and frees up memory
gc.collect()
########################################################################################################################
# define and train your model here using the loaded data sets!
model_name = "nn_example"
NN_model = keras.models.Sequential()
num_feature_neutralization = 200
input_dimension = len(features)
initializer = tf.keras.initializers.HeNormal()
# The Input Layer :
NN_model.add(
keras.layers.Dense(input_dimension, kernel_initializer=initializer, input_dim=len(features), activation='selu'))
NN_model.add(keras.layers.Dropout(0.2))
# The Hidden Layers :
layer_neurons = input_dimension
while layer_neurons > 4:
if layer_neurons > 100:
dropout = 0.3
layer_neurons //= 2
elif layer_neurons > 10:
dropout = 0.2
layer_neurons //= 4
else:
dropout = 0.2
layer_neurons //= 4
NN_model.add(keras.layers.Dense(layer_neurons, kernel_initializer=initializer, activation='selu',
bias_regularizer=keras.regularizers.l2(1e-4)))
NN_model.add(keras.layers.Dropout(dropout))
# The Output Layer :
NN_model.add(keras.layers.Dense(1, kernel_initializer='random_uniform', activation='linear'))
# Compile the network :
NN_model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mse'])
spinner.start('Training model')
X_train = convert_to_tensor(X_train)
# Early stopping callback function
es = tf.keras.callbacks.EarlyStopping(monitor='loss', mode='min', verbose=1, patience=1500, restore_best_weights=True)
# Train the neural network
if model_path:
NN_model.load_weights(model_path)
NN_model.fit(X_train, y_train, epochs=15000, batch_size=X_train.shape[0] // 100, shuffle=True, callbacks=[es])
else:
NN_model.fit(X_train, y_train, epochs=15000, batch_size=X_train.shape[0] // 100, shuffle=True, callbacks=[es])
print(f'Model successfully read from {model_path}\n')
spinner.succeed()
# Save weights as checkpoint
spinner.start('Saving model')
NN_model.save_weights('models/NN/checkpoint')
spinner.succeed()
gc.collect()
########################################################################################################################
spinner.start('Predicting on validation data')
# here we insert the predictions back into the validation data set, as we need to use the validation data set to
# perform feature neutralization later
X_val = convert_to_tensor(X_val)
validation_data.loc[:, f"preds_{model_name}"] = NN_model.predict(X_val)
spinner.succeed()
gc.collect()
spinner.start('Neutralizing to risky features')
# neutralize our predictions to the k riskiest features in the training set
neutralize_riskiest_features(training_data, validation_data, features, model_name, k=num_feature_neutralization)
spinner.succeed()
gc.collect()
print('Exporting Predictions to csv...')
validation_data["prediction"] = validation_data[f"preds_{model_name}_neutral_riskiest_{num_feature_neutralization}"] \
.rank(pct=True)
validation_data["prediction"].to_csv(f"predictions/NN/{model_name}.csv")
print('Done!')
spinner.start('Predicting on training data (making training data for meta-model)')
# here we insert the predictions back into the validation data set, as we need to use the validation data set to
# perform feature neutralization later
X_train = convert_to_tensor(X_train)
training_data.loc[:, f"preds_{model_name}"] = NN_model.predict(X_train)
spinner.succeed()
gc.collect()
spinner.start('Neutralizing to risky features')
# neutralize our predictions to the k riskiest features in the training set
neutralize_riskiest_features(training_data, training_data, features, model_name, k=num_feature_neutralization)
spinner.succeed()
gc.collect()
print('Exporting model training data to csv...')
training_data["prediction"] = training_data[f"preds_{model_name}_neutral_riskiest_{num_feature_neutralization}"] \
.rank(pct=True)
training_data["prediction"].to_csv(f"predictions/NN/meta_train_{model_name}.csv")
print('Done!')
print(f'Time elapsed: {(time.time() - start) / 60} mins')