Skip to content

Commit 67fbdaf

Browse files
committed
Fix many bugs in TabNet and use_gpu
1 parent f6b019d commit 67fbdaf

14 files changed

+70
-49
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,5 @@ tags
3636
.vscode/
3737

3838
*.swp
39+
40+
./pretrain

examples/benchmarks/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
1717
| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
1818
| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
1919
| DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 |
20+
2021
## Alpha158 dataset
2122
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
2223
|---|---|---|---|---|---|---|---|---|
@@ -25,7 +26,6 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
2526
| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 |
2627
| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 |
2728
| MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 |
28-
| TabNet with pretrain (Sercan O. Arikm et al) | Alpha158 | 0.0344±0.00|0.205±0.11|0.0398±0.00 |0.3479±0.01|0.0827±0.02|1.1141±0.32 |-0.0925±0.02 |
2929
| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 |
3030
| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 |
3131
| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 |
-1.86 MB
Binary file not shown.

examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ task:
5555
kwargs: *data_handler_config
5656
segments:
5757
pretrain: [2008-01-01, 2014-12-31]
58-
pretrain_validation: [2015-01-01, 2020-08-01]
58+
pretrain_validation: [2015-01-01, 2016-12-31]
5959
train: [2008-01-01, 2014-12-31]
6060
valid: [2015-01-01, 2016-12-31]
6161
test: [2017-01-01, 2020-08-01]

qlib/contrib/model/pytorch_alstm.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ def __init__(
7878
self.optimizer = optimizer.lower()
7979
self.loss = loss
8080
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
81-
self.use_gpu = torch.cuda.is_available()
8281
self.seed = seed
8382

8483
self.logger.info(
@@ -137,6 +136,10 @@ def __init__(
137136
self.fitted = False
138137
self.ALSTM_model.to(self.device)
139138

139+
@property
140+
def use_gpu(self):
141+
self.device == torch.device("cpu")
142+
140143
def mse(self, pred, label):
141144
loss = (pred - label) ** 2
142145
return torch.mean(loss)

qlib/contrib/model/pytorch_alstm_ts.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ def __init__(
8181
self.loss = loss
8282
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
8383
self.n_jobs = n_jobs
84-
self.use_gpu = torch.cuda.is_available()
8584
self.seed = seed
8685

8786
self.logger.info(
@@ -142,6 +141,10 @@ def __init__(
142141
self.fitted = False
143142
self.ALSTM_model.to(self.device)
144143

144+
@property
145+
def use_gpu(self):
146+
self.device == torch.device("cpu")
147+
145148
def mse(self, pred, label):
146149
loss = (pred - label) ** 2
147150
return torch.mean(loss)
@@ -277,10 +280,7 @@ def predict(self, dataset):
277280
feature = data[:, :, 0:-1].to(self.device)
278281

279282
with torch.no_grad():
280-
if self.use_gpu:
281-
pred = self.ALSTM_model(feature.float()).detach().cpu().numpy()
282-
else:
283-
pred = self.ALSTM_model(feature.float()).detach().numpy()
283+
pred = self.ALSTM_model(feature.float()).detach().cpu().numpy()
284284

285285
preds.append(pred)
286286

qlib/contrib/model/pytorch_gats.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ def __init__(
149149
self.fitted = False
150150
self.GAT_model.to(self.device)
151151

152+
@property
153+
def use_gpu(self):
154+
self.device == torch.device("cpu")
155+
152156
def mse(self, pred, label):
153157
loss = (pred - label) ** 2
154158
return torch.mean(loss)
@@ -326,10 +330,7 @@ def predict(self, dataset):
326330
x_batch = torch.from_numpy(x_values[batch]).float().to(self.device)
327331

328332
with torch.no_grad():
329-
if self.use_gpu:
330-
pred = self.GAT_model(x_batch).detach().cpu().numpy()
331-
else:
332-
pred = self.GAT_model(x_batch).detach().numpy()
333+
pred = self.GAT_model(x_batch).detach().cpu().numpy()
333334

334335
preds.append(pred)
335336

qlib/contrib/model/pytorch_gats_ts.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ def __init__(
107107
self.model_path = model_path
108108
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
109109
self.n_jobs = n_jobs
110-
self.use_gpu = torch.cuda.is_available()
111110
self.seed = seed
112111

113112
self.logger.info(
@@ -171,6 +170,10 @@ def __init__(
171170
self.fitted = False
172171
self.GAT_model.to(self.device)
173172

173+
@property
174+
def use_gpu(self):
175+
self.device == torch.device("cpu")
176+
174177
def mse(self, pred, label):
175178
loss = (pred - label) ** 2
176179
return torch.mean(loss)
@@ -347,10 +350,7 @@ def predict(self, dataset):
347350
feature = data[:, :, 0:-1].to(self.device)
348351

349352
with torch.no_grad():
350-
if self.use_gpu:
351-
pred = self.GAT_model(feature.float()).detach().cpu().numpy()
352-
else:
353-
pred = self.GAT_model(feature.float()).detach().numpy()
353+
pred = self.GAT_model(feature.float()).detach().cpu().numpy()
354354

355355
preds.append(pred)
356356

qlib/contrib/model/pytorch_gru.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ def __init__(
7878
self.optimizer = optimizer.lower()
7979
self.loss = loss
8080
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
81-
self.use_gpu = torch.cuda.is_available()
8281
self.seed = seed
8382

8483
self.logger.info(
@@ -137,6 +136,10 @@ def __init__(
137136
self.fitted = False
138137
self.gru_model.to(self.device)
139138

139+
@property
140+
def use_gpu(self):
141+
self.device == torch.device("cpu")
142+
140143
def mse(self, pred, label):
141144
loss = (pred - label) ** 2
142145
return torch.mean(loss)
@@ -292,10 +295,7 @@ def predict(self, dataset):
292295
x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
293296

294297
with torch.no_grad():
295-
if self.use_gpu:
296-
pred = self.gru_model(x_batch).detach().cpu().numpy()
297-
else:
298-
pred = self.gru_model(x_batch).detach().numpy()
298+
pred = self.gru_model(x_batch).detach().cpu().numpy()
299299

300300
preds.append(pred)
301301

qlib/contrib/model/pytorch_gru_ts.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ def __init__(
8181
self.loss = loss
8282
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
8383
self.n_jobs = n_jobs
84-
self.use_gpu = torch.cuda.is_available()
8584
self.seed = seed
8685

8786
self.logger.info(
@@ -142,6 +141,10 @@ def __init__(
142141
self.fitted = False
143142
self.GRU_model.to(self.device)
144143

144+
@property
145+
def use_gpu(self):
146+
self.device == torch.device("cpu")
147+
145148
def mse(self, pred, label):
146149
loss = (pred - label) ** 2
147150
return torch.mean(loss)

qlib/contrib/model/pytorch_lstm.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ def __init__(
7777
self.optimizer = optimizer.lower()
7878
self.loss = loss
7979
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
80-
self.use_gpu = torch.cuda.is_available()
8180
self.seed = seed
8281

8382
self.logger.info(
@@ -133,6 +132,10 @@ def __init__(
133132
self.fitted = False
134133
self.lstm_model.to(self.device)
135134

135+
@property
136+
def use_gpu(self):
137+
self.device == torch.device("cpu")
138+
136139
def mse(self, pred, label):
137140
loss = (pred - label) ** 2
138141
return torch.mean(loss)

qlib/contrib/model/pytorch_lstm_ts.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ def __init__(
8080
self.loss = loss
8181
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
8282
self.n_jobs = n_jobs
83-
self.use_gpu = torch.cuda.is_available()
8483
self.seed = seed
8584

8685
self.logger.info(
@@ -138,6 +137,10 @@ def __init__(
138137
self.fitted = False
139138
self.LSTM_model.to(self.device)
140139

140+
@property
141+
def use_gpu(self):
142+
self.device == torch.device("cpu")
143+
141144
def mse(self, pred, label):
142145
loss = (pred - label) ** 2
143146
return torch.mean(loss)
@@ -273,10 +276,7 @@ def predict(self, dataset):
273276
feature = data[:, :, 0:-1].to(self.device)
274277

275278
with torch.no_grad():
276-
if self.use_gpu:
277-
pred = self.LSTM_model(feature.float()).detach().cpu().numpy()
278-
else:
279-
pred = self.LSTM_model(feature.float()).detach().numpy()
279+
pred = self.LSTM_model(feature.float()).detach().cpu().numpy()
280280

281281
preds.append(pred)
282282

qlib/contrib/model/pytorch_nn.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ def __init__(
8282
self.optimizer = optimizer.lower()
8383
self.loss_type = loss
8484
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
85-
self.use_GPU = torch.cuda.is_available()
8685
self.seed = seed
8786
self.weight_decay = weight_decay
8887

@@ -101,7 +100,7 @@ def __init__(
101100
"\neval_steps : {}"
102101
"\nseed : {}"
103102
"\nvisible_GPU : {}"
104-
"\nuse_GPU : {}"
103+
"\nuse_gpu : {}"
105104
"\nweight_decay : {}".format(
106105
layers,
107106
lr,
@@ -116,7 +115,7 @@ def __init__(
116115
eval_steps,
117116
seed,
118117
GPU,
119-
self.use_GPU,
118+
self.use_gpu,
120119
weight_decay,
121120
)
122121
)
@@ -157,6 +156,10 @@ def __init__(
157156
self.fitted = False
158157
self.dnn_model.to(self.device)
159158

159+
@property
160+
def use_gpu(self):
161+
self.device == torch.device("cpu")
162+
160163
def fit(
161164
self,
162165
dataset: DatasetH,
@@ -254,7 +257,7 @@ def fit(
254257

255258
# restore the optimal parameters after training ??
256259
self.dnn_model.load_state_dict(torch.load(save_path))
257-
if self.use_GPU:
260+
if self.use_gpu:
258261
torch.cuda.empty_cache()
259262

260263
def get_loss(self, pred, w, target, loss_type):
@@ -276,10 +279,7 @@ def predict(self, dataset):
276279
self.dnn_model.eval()
277280

278281
with torch.no_grad():
279-
if self.use_GPU:
280-
preds = self.dnn_model(x_test).detach().cpu().numpy()
281-
else:
282-
preds = self.dnn_model(x_test).detach().numpy()
282+
preds = self.dnn_model(x_test).detach().cpu().numpy()
283283
return pd.Series(np.squeeze(preds), index=x_test_pd.index)
284284

285285
def save(self, filename, **kwargs):

qlib/contrib/model/pytorch_tabnet.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(
5555
ps=0.3,
5656
lr=0.01,
5757
pretrain=True,
58-
pretrain_file="./pretrain/best.model",
58+
pretrain_file=None,
5959
):
6060
"""
6161
TabNet model for Qlib
@@ -81,7 +81,7 @@ def __init__(
8181
self.metric = metric
8282
self.early_stop = early_stop
8383
self.pretrain = pretrain
84-
self.pretrain_file = pretrain_file
84+
self.pretrain_file = get_or_create_path(pretrain_file)
8585
self.logger.info(
8686
"TabNet:"
8787
"\nbatch_size : {}"
@@ -116,6 +116,10 @@ def __init__(
116116
else:
117117
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
118118

119+
@property
120+
def use_gpu(self):
121+
self.device == torch.device("cpu")
122+
119123
def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"):
120124
get_or_create_path(pretrain_file)
121125

@@ -182,7 +186,7 @@ def fit(
182186

183187
stop_steps = 0
184188
train_loss = 0
185-
best_score = np.inf
189+
best_score = -np.inf
186190
best_epoch = 0
187191
evals_result["train"] = []
188192
evals_result["valid"] = []
@@ -201,7 +205,7 @@ def fit(
201205
evals_result["train"].append(train_score)
202206
evals_result["valid"].append(val_score)
203207

204-
if val_score < best_score:
208+
if val_score > best_score:
205209
best_score = val_score
206210
stop_steps = 0
207211
best_epoch = epoch_idx
@@ -215,6 +219,9 @@ def fit(
215219
self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
216220
self.tabnet_model.load_state_dict(best_param)
217221
torch.save(best_param, save_path)
222+
223+
if self.use_gpu:
224+
torch.cuda.empty_cache()
218225

219226
def predict(self, dataset):
220227
if not self.fitted:
@@ -264,12 +271,13 @@ def test_epoch(self, data_x, data_y):
264271
feature = x_values[indices[i : i + self.batch_size]].float().to(self.device)
265272
label = y_values[indices[i : i + self.batch_size]].float().to(self.device)
266273
priors = torch.ones(self.batch_size, self.d_feat).to(self.device)
267-
pred = self.tabnet_model(feature, priors)
268-
loss = self.loss_fn(pred, label)
269-
losses.append(loss.item())
274+
with torch.no_grad():
275+
pred = self.tabnet_model(feature, priors)
276+
loss = self.loss_fn(pred, label)
277+
losses.append(loss.item())
270278

271-
score = self.metric_fn(pred, label)
272-
scores.append(score.item())
279+
score = self.metric_fn(pred, label)
280+
scores.append(score.item())
273281

274282
return np.mean(losses), np.mean(scores)
275283

@@ -352,10 +360,11 @@ def pretrain_test_epoch(self, x_train):
352360
label = y_train_values.float().to(self.device)
353361
S_mask = S_mask.to(self.device)
354362
priors = 1 - S_mask
355-
(vec, sparse_loss) = self.tabnet_model(feature, priors)
356-
f = self.tabnet_decoder(vec)
363+
with torch.no_grad():
364+
(vec, sparse_loss) = self.tabnet_model(feature, priors)
365+
f = self.tabnet_decoder(vec)
357366

358-
loss = self.pretrain_loss_fn(label, f, S_mask)
367+
loss = self.pretrain_loss_fn(label, f, S_mask)
359368
losses.append(loss.item())
360369

361370
return np.mean(losses)

0 commit comments

Comments
 (0)