Skip to content

Commit e626264

Browse files
committed
Merge branch 'main' of github.com:microsoft/qlib into fshare
2 parents b99de06 + e8beaa5 commit e626264

File tree

8 files changed

+41
-13
lines changed

8 files changed

+41
-13
lines changed

docs/component/data.rst

+19
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,25 @@ Filter
218218
- `cross-sectional features filter` \: rule_expression = '$rank($close)<10'
219219
- `time-sequence features filter`: rule_expression = '$Ref($close, 3)>100'
220220

221+
Here is a simple example showing how to use filter in a basic ``Qlib`` workflow configuration file:
222+
223+
.. code-block:: yaml
224+
225+
filter: &filter
226+
filter_type: ExpressionDFilter
227+
rule_expression: "Ref($close, -2) / Ref($close, -1) > 1"
228+
filter_start_time: 2010-01-01
229+
filter_end_time: 2010-01-07
230+
keep: False
231+
232+
data_handler_config: &data_handler_config
233+
start_time: 2010-01-01
234+
end_time: 2021-01-22
235+
fit_start_time: 2010-01-01
236+
fit_end_time: 2015-12-31
237+
instruments: *market
238+
filter_pipe: [*filter]
239+
221240
To know more about ``Filter``, please refer to `Filter API <../reference/api.html#module-qlib.data.filter>`_.
222241

223242
Reference

qlib/contrib/model/pytorch_alstm_ts.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,12 @@ def fit(
213213
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
214214
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader
215215

216-
train_loader = DataLoader(dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs)
217-
valid_loader = DataLoader(dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs)
216+
train_loader = DataLoader(
217+
dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True
218+
)
219+
valid_loader = DataLoader(
220+
dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True
221+
)
218222

219223
save_path = get_or_create_path(save_path)
220224

qlib/contrib/model/pytorch_gats_ts.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,8 @@ def fit(
261261
sampler_train = DailyBatchSampler(dl_train)
262262
sampler_valid = DailyBatchSampler(dl_valid)
263263

264-
train_loader = DataLoader(dl_train, sampler=sampler_train, num_workers=self.n_jobs)
265-
valid_loader = DataLoader(dl_valid, sampler=sampler_valid, num_workers=self.n_jobs)
264+
train_loader = DataLoader(dl_train, sampler=sampler_train, num_workers=self.n_jobs, drop_last=True)
265+
valid_loader = DataLoader(dl_valid, sampler=sampler_valid, num_workers=self.n_jobs, drop_last=True)
266266

267267
save_path = get_or_create_path(save_path)
268268

qlib/contrib/model/pytorch_gru_ts.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,12 @@ def fit(
213213
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
214214
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader
215215

216-
train_loader = DataLoader(dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs)
217-
valid_loader = DataLoader(dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs)
216+
train_loader = DataLoader(
217+
dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True
218+
)
219+
valid_loader = DataLoader(
220+
dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True
221+
)
218222

219223
save_path = get_or_create_path(save_path)
220224

qlib/contrib/model/pytorch_lstm_ts.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,12 @@ def fit(
209209
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
210210
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader
211211

212-
train_loader = DataLoader(dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs)
213-
valid_loader = DataLoader(dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs)
212+
train_loader = DataLoader(
213+
dl_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_jobs, drop_last=True
214+
)
215+
valid_loader = DataLoader(
216+
dl_valid, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs, drop_last=True
217+
)
214218

215219
save_path = get_or_create_path(save_path)
216220

qlib/data/dataset/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ def __getitem__(self, idx: Union[int, Tuple[object, str], List[int]]):
413413
# 1) for better performance, use the last nan line for padding the lost date
414414
# 2) In case of precision problems. We use np.float64. # TODO: I'm not sure if whether np.float64 will result in
415415
# precision problems. It will not cause any problems in my tests at least
416-
indices = np.nan_to_num(indices.astype(np.float64), nan=self.nan_idx).astype(np.int)
416+
indices = np.nan_to_num(indices.astype(np.float64), nan=self.nan_idx).astype(int)
417417

418418
data = self.data_arr[indices]
419419
if isinstance(idx, mtit):

qlib/data/ops.py

-3
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ class NpElemOperator(ElemOperator):
7474
"""
7575

7676
def __init__(self, feature, func):
77-
self.feature = feature
7877
self.func = func
7978
super(NpElemOperator, self).__init__(feature)
8079

@@ -289,8 +288,6 @@ class NpPairOperator(PairOperator):
289288
"""
290289

291290
def __init__(self, feature_left, feature_right, func):
292-
self.feature_left = feature_left
293-
self.feature_right = feature_right
294291
self.func = func
295292
super(NpPairOperator, self).__init__(feature_left, feature_right)
296293

qlib/utils/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def np_ffill(arr: np.array):
6464
arr : np.array
6565
Input numpy 1D array
6666
"""
67-
mask = np.isnan(arr.astype(np.float)) # np.isnan only works on np.float
67+
mask = np.isnan(arr.astype(float)) # np.isnan only works on np.float
6868
# get fill index
6969
idx = np.where(~mask, np.arange(mask.shape[0]), 0)
7070
np.maximum.accumulate(idx, out=idx)

0 commit comments

Comments
 (0)