-
Notifications
You must be signed in to change notification settings - Fork 61
/
Copy pathdataloader.py
53 lines (39 loc) · 1.52 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from torch.utils.data import Dataset
import pickle
import numpy as np
import torch
def load_pickle(fname):
with open(fname, 'rb') as f:
return pickle.load(f)
def downsample(train_idx, neg_young, train_idx_pos):
downsamples = np.random.permutation(neg_young)[:450000]
mask=np.ones(len(train_idx), bool)
mask[downsamples] = False
downsample_idx = np.concatenate((train_idx[mask], np.repeat(train_idx_pos,50)))
return downsample_idx
class OriginalData:
def __init__(self, path):
self.path = path
self.feature_selection = load_pickle(path + 'frts_selection.pkl')
self.x = load_pickle(path + 'preprocess_x.pkl')[:, self.feature_selection]
self.y = load_pickle(path + 'y_bin.pkl')
def datasampler(self, idx_path, train = True):
idx = load_pickle(self.path + idx_path)
if train:
downsample_idx = downsample(idx, load_pickle(self.path + 'neg_young.pkl'), idx[self.y[idx] == 1])
return self.x[downsample_idx, :], self.y[downsample_idx]
return self.x, self.y
class EHRData(Dataset):
def __init__(self, data, cla):
self.data = data
self.cla = cla
def __len__(self):
return len(self.cla)
def __getitem__(self, idx):
return self.data[idx], self.cla[idx]
def collate_fn(data):
# padding
data_list = []
for datum in data:
data_list.append(np.hstack((datum[0].toarray().ravel(), datum[1])))
return torch.from_numpy(np.array(data_list)).long()