Skip to content

Commit 8398fd1

Browse files
committed
initial commit, Date2Vec done, TODO documentation
0 parents  commit 8398fd1

File tree

296 files changed

+1936367
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

296 files changed

+1936367
-0
lines changed

Data.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from torch.utils.data import Dataset
2+
import numpy as np
3+
4+
class NextDateDataset(Dataset):
5+
def __init__(self, dates):
6+
dates = [date.split(",") for date in dates]
7+
8+
convert_int = lambda dt: list(map(int, dt))
9+
self.dates = [convert_int(date) for date in dates]
10+
11+
#print(dates)
12+
13+
def __len__(self):
14+
return len(self.dates)-1
15+
16+
def __getitem__(self, idx):
17+
return np.array(self.dates[idx]).astype(np.float32), np.array(self.dates[idx+1]).astype(np.float32)
18+
19+
class TimeDateDataset(Dataset):
20+
def __init__(self, dates):
21+
dates = [date.split(",") for date in dates]
22+
23+
convert_int = lambda dt: list(map(int, dt))
24+
self.dates = [convert_int(date) for date in dates]
25+
26+
#print(dates)
27+
28+
def __len__(self):
29+
return len(self.dates)-1
30+
31+
def __getitem__(self, idx):
32+
x = np.array(self.dates[idx]).astype(np.float32)
33+
return x, x
34+
35+
if __name__ == "__main__":
36+
dt = open("dates.txt", 'r').readlines()
37+
dataset = NextDateDataset(dt)
38+
print(dataset[0])

Date2Vec.py

Whitespace-only changes.

Experiment.py

+282
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
from Model import Date2Vec
2+
from Data import NextDateDataset, TimeDateDataset
3+
import torch
4+
from torch.utils.data import DataLoader
5+
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
6+
from tensorboard_logger import configure, log_value
7+
import os
8+
9+
class NextDateExperiment:
10+
def __init__(self, model, act, optim='adam', lr=0.001, batch_size=256, num_epoch=50, cuda=False):
11+
self.model = model
12+
self.optim = optim
13+
self.lr = lr
14+
self.batch_size = 128
15+
self.num_epoch = num_epoch
16+
self.cuda = cuda
17+
self.act = act
18+
19+
with open('dates.txt', 'r') as f:
20+
full = f.readlines()
21+
train = full[len(full)//3: 2*len(full)//3]
22+
test_prev = full[:len(full)//3]
23+
test_after = full[2*len(full)//3:]
24+
25+
self.train_dataset = NextDateDataset(train)
26+
self.test_prev_dataset = NextDateDataset(test_prev)
27+
self.test_after_dataset = NextDateDataset(test_after)
28+
29+
def train(self):
30+
loss_fn1 = torch.nn.L1Loss()
31+
loss_fn2 = torch.nn.MSELoss()
32+
loss_fn = lambda y_true, y_pred: loss_fn1(y_true, y_pred) + loss_fn2(y_true, y_pred)
33+
34+
if self.cuda:
35+
loss_fn = loss_fn.cuda()
36+
self.model = self.model.cuda()
37+
38+
if self.optim == 'adam':
39+
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
40+
elif self.optim == 'sgd_momentum':
41+
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.9)
42+
else:
43+
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.9, nesterov=True)
44+
45+
train_dataloader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
46+
test1_dataloader = DataLoader(self.test_prev_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
47+
test2_dataloader = DataLoader(self.test_after_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
48+
49+
50+
avg_best = 1000000
51+
avg_loss = 0
52+
step = 0
53+
54+
for ep in range(self.num_epoch):
55+
for (x, y), (x_prev, y_prev), (x_after, y_after) in zip(train_dataloader, test1_dataloader, test2_dataloader):
56+
if self.cuda:
57+
x = x.cuda()
58+
y = y.cuda()
59+
x_prev = x_prev.cuda()
60+
y_prev = y_prev.cuda()
61+
x_after = x_after.cuda()
62+
y_after = y_after.cuda()
63+
64+
65+
optimizer.zero_grad()
66+
67+
y_pred = self.model(x)
68+
loss = loss_fn(y_pred, y)
69+
70+
loss.backward()
71+
optimizer.step()
72+
73+
with torch.no_grad():
74+
y_pred_prev = self.model(x_prev)
75+
r2_prev = r2_score(y_prev.cpu().numpy(), y_pred_prev.cpu().numpy())
76+
mae_prev = mean_absolute_error(y_prev.cpu().numpy(), y_pred_prev.cpu().numpy())
77+
mse_prev = mean_squared_error(y_prev.cpu().numpy(), y_pred_prev.cpu().numpy())
78+
79+
y_pred_after = self.model(x_after)
80+
r2_after = r2_score(y_after.cpu().numpy(), y_pred_after.cpu().numpy())
81+
mae_after = mean_absolute_error(y_after.cpu().numpy(), y_pred_after.cpu().numpy())
82+
mse_after = mean_squared_error(y_after.cpu().numpy(), y_pred_after.cpu().numpy())
83+
84+
print("ep:{}, batch:{}, train_loss:{:.4f}, test1_mse:{:.4f}, test2_mse:{:.4f}".format(
85+
ep,
86+
step,
87+
loss.item(),
88+
mse_prev,
89+
mse_after
90+
))
91+
92+
log_value('train_loss', loss.item(), step)
93+
log_value('test1_r2', r2_prev, step)
94+
log_value('test1_mse', mse_prev, step)
95+
log_value('test1_mae', mae_prev, step)
96+
log_value('test2_r2', r2_after, step)
97+
log_value('test2_mse', mse_after, step)
98+
log_value('test2_mae', mae_after, step)
99+
100+
avg_loss = (loss.item() + mse_prev + mse_after) / 3
101+
if avg_loss < avg_best:
102+
avg_best = avg_loss
103+
torch.save(self.model, "./models/{}/nextdate_{}_{}.pth".format(self.act,step, avg_best))
104+
105+
step += 1
106+
107+
def test(self):
108+
test1_dataloader = DataLoader(self.test_prev_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
109+
test2_dataloader = DataLoader(self.test_after_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
110+
111+
to_int = lambda dt: list(map(int, dt))
112+
113+
total_pred_test1 = len(self.test_prev_dataset)
114+
total_pred_test2 = len(self.test_after_dataset)
115+
116+
correct_pred_prev = 0
117+
correct_pred_after = 0
118+
119+
def count_correct(ypred, ytrue):
120+
c = 0
121+
for p, t in zip(ypred, ytrue):
122+
for pi, ti in zip(to_int(p), to_int(t)):
123+
if pi == ti:
124+
c += 1
125+
return c
126+
127+
for (x_prev, y_prev), (x_after, y_after) in zip(test1_dataloader, test2_dataloader):
128+
with torch.no_grad():
129+
y_pred_prev = self.model(x_prev).cpu().numpy().tolist()
130+
correct_pred_prev += count_correct(y_pred_prev, y_prev.cpu().numpy().tolist())
131+
132+
y_pred_after = self.model(x_after)
133+
correct_pred_after += count_correct(y_pred_after, y_after.cpu().numpy().tolist())
134+
135+
prev_acc = correct_pred_prev / total_pred_test1
136+
after_acc = correct_pred_after / total_pred_test2
137+
138+
return prev_acc, after_acc
139+
140+
class Date2VecExperiment:
141+
def __init__(self, model, act, optim='adam', lr=0.001, batch_size=256, num_epoch=50, cuda=False):
142+
self.model = model
143+
if cuda:
144+
self.model = model.cuda()
145+
self.optim = optim
146+
self.lr = lr
147+
self.batch_size = batch_size
148+
self.num_epoch = num_epoch
149+
self.cuda = cuda
150+
self.act = act
151+
152+
with open('date_time.txt', 'r') as f:
153+
full = f.readlines()
154+
train = full[len(full)//3: 2*len(full)//3]
155+
test_prev = full[:len(full)//3]
156+
test_after = full[2*len(full)//3:]
157+
158+
self.train_dataset = TimeDateDataset(train)
159+
self.test_prev_dataset = TimeDateDataset(test_prev)
160+
self.test_after_dataset = TimeDateDataset(test_after)
161+
162+
def train(self):
163+
#loss_fn1 = torch.nn.L1Loss()
164+
loss_fn = torch.nn.MSELoss()
165+
#loss_fn = lambda y_true, y_pred: loss_fn1(y_true, y_pred) + loss_fn2(y_true, y_pred)
166+
167+
if self.cuda:
168+
loss_fn = loss_fn.cuda()
169+
self.model = self.model.cuda()
170+
171+
if self.optim == 'adam':
172+
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
173+
elif self.optim == 'sgd_momentum':
174+
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.9)
175+
else:
176+
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.9, nesterov=True)
177+
178+
train_dataloader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
179+
test1_dataloader = DataLoader(self.test_prev_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
180+
test2_dataloader = DataLoader(self.test_after_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
181+
182+
183+
avg_best = 1000000000000000
184+
avg_loss = 0
185+
step = 0
186+
187+
for ep in range(self.num_epoch):
188+
for (x, y), (x_prev, y_prev), (x_after, y_after) in zip(train_dataloader, test1_dataloader, test2_dataloader):
189+
if self.cuda:
190+
x = x.cuda()
191+
y = y.cuda()
192+
x_prev = x_prev.cuda()
193+
y_prev = y_prev.cuda()
194+
x_after = x_after.cuda()
195+
y_after = y_after.cuda()
196+
197+
optimizer.zero_grad()
198+
199+
y_pred = self.model(x)
200+
loss = loss_fn(y_pred, y)
201+
202+
loss.backward()
203+
optimizer.step()
204+
205+
with torch.no_grad():
206+
y_pred_prev = self.model(x_prev)
207+
r2_prev = r2_score(y_prev.cpu().numpy(), y_pred_prev.cpu().numpy())
208+
mae_prev = mean_absolute_error(y_prev.cpu().numpy(), y_pred_prev.cpu().numpy())
209+
mse_prev = mean_squared_error(y_prev.cpu().numpy(), y_pred_prev.cpu().numpy())
210+
211+
y_pred_after = self.model(x_after)
212+
r2_after = r2_score(y_after.cpu().numpy(), y_pred_after.cpu().numpy())
213+
mae_after = mean_absolute_error(y_after.cpu().numpy(), y_pred_after.cpu().numpy())
214+
mse_after = mean_squared_error(y_after.cpu().numpy(), y_pred_after.cpu().numpy())
215+
216+
print("ep:{}, batch:{}, train_loss:{:.4f}, test1_mse:{:.4f}, test2_mse:{:.4f}".format(
217+
ep,
218+
step,
219+
loss.item(),
220+
mse_prev,
221+
mse_after
222+
))
223+
224+
log_value('train_loss', loss.item(), step)
225+
log_value('test1_r2', r2_prev, step)
226+
log_value('test1_mse', mse_prev, step)
227+
log_value('test1_mae', mae_prev, step)
228+
log_value('test2_r2', r2_after, step)
229+
log_value('test2_mse', mse_after, step)
230+
log_value('test2_mae', mae_after, step)
231+
232+
avg_loss = (loss.item() + avg_loss) / 2
233+
if avg_loss < avg_best:
234+
avg_best = avg_loss
235+
torch.save(self.model, "./models/d2v_{}/d2v_{}_{}.pth".format(self.act,step, avg_best))
236+
237+
step += 1
238+
239+
def test(self):
240+
test1_dataloader = DataLoader(self.test_prev_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
241+
test2_dataloader = DataLoader(self.test_after_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=self.cuda)
242+
243+
to_int = lambda dt: list(map(int, dt))
244+
245+
total_pred_test1 = len(self.test_prev_dataset)
246+
total_pred_test2 = len(self.test_after_dataset)
247+
248+
correct_pred_prev = 0
249+
correct_pred_after = 0
250+
251+
def count_correct(ypred, ytrue):
252+
c = 0
253+
for p, t in zip(ypred, ytrue):
254+
for pi, ti in zip(to_int(p), to_int(t)):
255+
if pi == ti:
256+
c += 1
257+
return c
258+
259+
for (x_prev, y_prev), (x_after, y_after) in zip(test1_dataloader, test2_dataloader):
260+
with torch.no_grad():
261+
y_pred_prev = self.model(x_prev).cpu().numpy().tolist()
262+
correct_pred_prev += count_correct(y_pred_prev, y_prev.cpu().numpy().tolist())
263+
264+
y_pred_after = self.model(x_after)
265+
correct_pred_after += count_correct(y_pred_after, y_after.cpu().numpy().tolist())
266+
267+
prev_acc = correct_pred_prev / total_pred_test1
268+
after_acc = correct_pred_after / total_pred_test2
269+
270+
return prev_acc, after_acc
271+
if __name__ == "__main__":
272+
act = 'cos'
273+
optim = 'adam'
274+
os.system("mkdir ./models/d2v_{}".format(act))
275+
configure("logs/d2v_{}".format(act))
276+
277+
m = Date2Vec(k=64, act=act)
278+
#m = torch.load("models/sin/nextdate_11147_23.02417500813802.pth")
279+
exp = Date2VecExperiment(m, act, lr=0.001, cuda=True, optim=optim)
280+
exp.train()
281+
#test1_acc, test2_acc = exp.test()
282+
#print("test1 accuracy:{}, test2 accuracy:{}".format(test1_acc, test2_acc))

Model.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import torch
2+
from torch import nn
3+
4+
class Date2VecConvert:
5+
def __init__(self, model_path="./d2v_model/d2v_98291_17.169918439404636.pth"):
6+
self.model = torch.load(model_path, map_location='cpu').eval()
7+
8+
def convert(self, x):
9+
return self.model.encode(torch.Tensor(x).unsqueeze(0)).squeeze(0).cpu()
10+
11+
class Date2Vec(nn.Module):
12+
def __init__(self, k=32, act="sin"):
13+
super(Date2Vec, self).__init__()
14+
15+
if k % 2 == 0:
16+
k1 = k // 2
17+
k2 = k // 2
18+
else:
19+
k1 = k // 2
20+
k2 = k // 2 + 1
21+
22+
self.fc1 = nn.Linear(6, k1)
23+
24+
self.fc2 = nn.Linear(6, k2)
25+
self.d2 = nn.Dropout(0.3)
26+
27+
if act == 'sin':
28+
self.activation = torch.sin
29+
else:
30+
self.activation = torch.cos
31+
32+
self.fc3 = nn.Linear(k, k // 2)
33+
self.d3 = nn.Dropout(0.3)
34+
35+
self.fc4 = nn.Linear(k // 2, 6)
36+
37+
self.fc5 = torch.nn.Linear(6, 6)
38+
39+
def forward(self, x):
40+
out1 = self.fc1(x)
41+
out2 = self.d2(self.activation(self.fc2(x)))
42+
out = torch.cat([out1, out2], 1)
43+
out = self.d3(self.fc3(out))
44+
out = self.fc4(out)
45+
out = self.fc5(out)
46+
return out
47+
48+
def encode(self, x):
49+
out1 = self.fc1(x)
50+
out2 = self.activation(self.fc2(x))
51+
out = torch.cat([out1, out2], 1)
52+
return out
53+
54+
if __name__ == "__main__":
55+
model = Date2Vec()
56+
inp = torch.randn(1, 6)
57+
58+
out = model(inp)
59+
print(out)
60+
print(out.shape)
18.4 KB
Binary file not shown.

0 commit comments

Comments
 (0)