def accuracy(out, yb):
-return (out.argmax(1)==yb).float().mean()
-
-= F.cross_entropy loss_func
+
accuracy
++++accuracy (out, yb)
= F.cross_entropy loss_func
= 50
- bs = x_train[:bs]
- xb = y_train[:bs]
- yb = model(xb)
- preds 0] preds[
= 50
+ bs = x_train[:bs]
+ xb = y_train[:bs]
+ yb = model(xb)
+ preds 0] preds[
tensor([-0.0843, -0.0098, 0.1629, 0.1187, 0.1040, 0.0934, -0.1870, -0.0530,
-0.1361, -0.0224], grad_fn=<SelectBackward0>)
loss_func(preds, yb)
loss_func(preds, yb)
tensor(2.2846, grad_fn=<NllLossBackward0>)
accuracy(preds, yb)
accuracy(preds, yb)
tensor(0.1400)
def report(loss, preds, yb):
-print(f"loss: {loss:.2f}, accuracy: {accuracy(preds, yb):.2f}")
+
report
+++report (loss, preds, yb)
report(loss, preds, yb)
report(loss, preds, yb)
loss: 2.30, accuracy: 0.14
= x_train.shape
- n, m = 0.5
- lr = 3
- epochs = x_train[:bs], y_train[:bs]
- xb,yb = model(xb)
- preds = loss_func(preds, yb)
- loss report(loss, preds, yb)
= x_train.shape
+ n, m = 0.5
+ lr = 3
+ epochs = x_train[:bs], y_train[:bs]
+ xb,yb = model(xb)
+ preds = loss_func(preds, yb)
+ loss report(loss, preds, yb)
loss: 2.28, accuracy: 0.14
for epoch in range(epochs):
-for i in range(0, n, bs):
- = slice(i, min(i+bs, n))
- s = x_train[s],y_train[s]
- xb,yb = model(xb)
- preds = loss_func(preds, yb)
- loss
- loss.backward()with torch.no_grad():
- for l in model.layers:
- if hasattr(l, 'weight'):
- -= l.weight.grad * lr
- l.weight -= l.bias.grad * lr
- l.bias
- l.weight.grad.zero_()
- l.bias.grad.zero_() report(loss, preds, yb)
for epoch in range(epochs):
+for i in range(0, n, bs):
+ = slice(i, min(i+bs, n))
+ s = x_train[s],y_train[s]
+ xb,yb = model(xb)
+ preds = loss_func(preds, yb)
+ loss
+ loss.backward()with torch.no_grad():
+ for l in model.layers:
+ if hasattr(l, 'weight'):
+ -= l.weight.grad * lr
+ l.weight -= l.bias.grad * lr
+ l.bias
+ l.weight.grad.zero_()
+ l.bias.grad.zero_() report(loss, preds, yb)
loss: 0.17, accuracy: 0.94
loss: 0.13, accuracy: 0.94
loss: 0.13, accuracy: 0.96
-# parameters
+# parameters
= nn.Module()
- m1 = nn.Linear(3, 4)
- m1.foo = 'hey'
- m1.boo m1
= nn.Module()
+ m1 = nn.Linear(3, 4)
+ m1.foo = 'hey'
+ m1.boo m1
Module(
(foo): Linear(in_features=3, out_features=4, bias=True)
@@ -435,13 +443,13 @@ simple 2 layer nn
list(m1.named_children())
list(m1.named_children())
[('foo', Linear(in_features=3, out_features=4, bias=True))]
list(m1.parameters())
list(m1.parameters())
[Parameter containing:
tensor([[-0.4626, -0.5572, -0.2930],
@@ -453,19 +461,19 @@ simple 2 layer nn
class MLP(nn.Module):
-def __init__(self, n_in, n_h, n_out):
- super().__init__()
- self.l1 = nn.Linear(n_in, n_h)
- self.relu = nn.ReLU()
- self.l2 = nn.Linear(n_h, n_out)
-
- def forward(self, x):
- return self.l2(self.relu(self.l1(x)))
class MLP(nn.Module):
+def __init__(self, n_in, n_h, n_out):
+ super().__init__()
+ self.l1 = nn.Linear(n_in, n_h)
+ self.relu = nn.ReLU()
+ self.l2 = nn.Linear(n_h, n_out)
+
+ def forward(self, x):
+ return self.l2(self.relu(self.l1(x)))
= MLP(n_in, n_h, 10)
- model model
= MLP(n_in, n_h, 10)
+ model model
MLP(
(l1): Linear(in_features=784, out_features=50, bias=True)
@@ -475,8 +483,8 @@ simple 2 layer nn
for name, l in model.named_children():
-print(f"{name}: {l}")
for name, l in model.named_children():
+print(f"{name}: {l}")
l1: Linear(in_features=784, out_features=50, bias=True)
relu: ReLU()
@@ -484,8 +492,8 @@ simple 2 layer nn
for p in model.parameters():
-print(p.shape)
for p in model.parameters():
+print(p.shape)
torch.Size([50, 784])
torch.Size([50])
@@ -494,60 +502,60 @@ simple 2 layer nn
def fit():
-for epoch in range(epochs):
- for i in range(0, n, bs):
- = slice(i, min(i+bs, n))
- s = x_train[s], y_train[s]
- xb,yb = model(xb)
- preds = loss_func(preds, yb)
- loss
- loss.backward()with torch.no_grad():
- for p in model.parameters():
- -= p.grad * lr
- p
- model.zero_grad() report(loss, preds, yb)
def fit():
+for epoch in range(epochs):
+ for i in range(0, n, bs):
+ = slice(i, min(i+bs, n))
+ s = x_train[s], y_train[s]
+ xb,yb = model(xb)
+ preds = loss_func(preds, yb)
+ loss
+ loss.backward()with torch.no_grad():
+ for p in model.parameters():
+ -= p.grad * lr
+ p
+ model.zero_grad() report(loss, preds, yb)
fit()
fit()
loss: 0.02, accuracy: 1.00
loss: 0.05, accuracy: 0.98
loss: 0.03, accuracy: 1.00
# nn.Module behind the scene
# nn.Module behind the scene
class MyModule:
-def __init__(self, n_in, n_h, n_out):
- self._modules = {}
- self.l1 = nn.Linear(n_in, n_h)
- self.l2 = nn.Linear(n_h, n_out)
- self.relu = nn.ReLU()
-
- def __setattr__(self, k, v):
- if not k.startswith('_'):
- self._modules[k] = v
-
- super().__setattr__(k, v)
-
- def __repr__(self):
- return f"{self._modules}"
-
- def parameters(self):
- for l in self._modules.values():
- yield from l.parameters()
class MyModule:
+def __init__(self, n_in, n_h, n_out):
+ self._modules = {}
+ self.l1 = nn.Linear(n_in, n_h)
+ self.l2 = nn.Linear(n_h, n_out)
+ self.relu = nn.ReLU()
+
+ def __setattr__(self, k, v):
+ if not k.startswith('_'):
+ self._modules[k] = v
+
+ super().__setattr__(k, v)
+
+ def __repr__(self):
+ return f"{self._modules}"
+
+ def parameters(self):
+ for l in self._modules.values():
+ yield from l.parameters()
= MyModule(n_in, n_h, n_o)
- mdl mdl
= MyModule(n_in, n_h, n_o)
+ mdl mdl
{'l1': Linear(in_features=784, out_features=50, bias=True), 'l2': Linear(in_features=50, out_features=10, bias=True), 'relu': ReLU()}
for p in mdl.parameters():
-print(p.shape)
for p in mdl.parameters():
+print(p.shape)
torch.Size([50, 784])
torch.Size([50])
@@ -555,27 +563,27 @@ simple 2 layer nn
torch.Size([10])
# registering modules
# registering modules
from functools import reduce
from functools import reduce
= [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o)] layers
= [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o)] layers
class Model(nn.Module):
-def __init__(self, layers):
- super().__init__()
- self.layers = layers
- for i,l in enumerate(self.layers):
- self.add_module(f"layer_{i}", l)
-
- def forward(self, x):
- return reduce(lambda val, layer: layer(val), self.layers, x)
class Model(nn.Module):
+def __init__(self, layers):
+ super().__init__()
+ self.layers = layers
+ for i,l in enumerate(self.layers):
+ self.add_module(f"layer_{i}", l)
+
+ def forward(self, x):
+ return reduce(lambda val, layer: layer(val), self.layers, x)
= Model(layers)
- model model
= Model(layers)
+ model model
Model(
(layer_0): Linear(in_features=784, out_features=50, bias=True)
@@ -585,36 +593,36 @@ simple 2 layer nn
model(xb).shape
model(xb).shape
torch.Size([50, 10])
# nn.ModuleList
# nn.ModuleList
class SequentialModel(nn.Module):
-def __init__(self, layers):
- super().__init__()
- self.layers = nn.ModuleList(layers)
-
- def forward(self, x):
- for l in self.layers:
- = l(x)
- x return x
class SequentialModel(nn.Module):
+def __init__(self, layers):
+ super().__init__()
+ self.layers = nn.ModuleList(layers)
+
+ def forward(self, x):
+ for l in self.layers:
+ = l(x)
+ x return x
= SequentialModel(layers)
- model model(xb).shape
= SequentialModel(layers)
+ model model(xb).shape
torch.Size([50, 10])
# nn.Sequential
# nn.Sequential
= nn.Sequential(*layers) model
= nn.Sequential(*layers) model
fit()
fit()
loss: 0.14, accuracy: 0.96
loss: 0.11, accuracy: 0.96
@@ -622,7 +630,7 @@ simple 2 layer nn
model
model
Sequential(
(0): Linear(in_features=784, out_features=50, bias=True)
@@ -631,38 +639,38 @@ simple 2 layer nn
)
# optim
# optim
class Optimizer:
-def __init__(self, params, lr=0.5):
- self.params, self.lr = list(params), lr
-
- def step(self):
- with torch.no_grad():
- for p in self.params:
- -= p.grad * self.lr
- p
-def zero_grad(self):
- for p in self.params:
- p.grad.data.zero_()
class Optimizer:
+def __init__(self, params, lr=0.5):
+ self.params, self.lr = list(params), lr
+
+ def step(self):
+ with torch.no_grad():
+ for p in self.params:
+ -= p.grad * self.lr
+ p
+def zero_grad(self):
+ for p in self.params:
+ p.grad.data.zero_()
= nn.Sequential(nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o)) model
= nn.Sequential(nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o)) model
= Optimizer(model.parameters(), lr=lr) opt
= Optimizer(model.parameters(), lr=lr) opt
for epoch in range(epochs):
-for i in range(0, n, bs):
- = slice(i, min(i+bs, n))
- s = x_train[s],y_train[s]
- xb,yb = model(xb)
- preds = loss_func(preds, yb)
- loss
- loss.backward()
- opt.step()
- opt.zero_grad() report(loss, preds, yb)
for epoch in range(epochs):
+for i in range(0, n, bs):
+ = slice(i, min(i+bs, n))
+ s = x_train[s],y_train[s]
+ xb,yb = model(xb)
+ preds = loss_func(preds, yb)
+ loss
+ loss.backward()
+ opt.step()
+ opt.zero_grad() report(loss, preds, yb)
loss: 0.13, accuracy: 0.96
loss: 0.12, accuracy: 0.92
@@ -670,40 +678,41 @@ simple 2 layer nn
from torch import optim
from torch import optim
def get_model():
-= nn.Sequential(nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o))
- model = optim.SGD(model.parameters(), lr=lr)
- opt return opt, model
def get_model():
+= nn.Sequential(nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o))
+ model = optim.SGD(model.parameters(), lr=lr)
+ opt return opt, model
= get_model()
- opt, model loss_func(model(xb), yb)
= get_model()
+ opt, model loss_func(model(xb), yb)
tensor(2.2912, grad_fn=<NllLossBackward0>)
for epoch in range(epochs):
-for i in range(0, n, bs):
- = slice(i, min(i+bs, n))
- s = x_train[s],y_train[s]
- xb,yb = model(xb)
- preds = loss_func(preds, yb)
- loss
- loss.backward()
- opt.step()
- opt.zero_grad() report(loss, preds, yb)
for epoch in range(epochs):
+for i in range(0, n, bs):
+ = slice(i, min(i+bs, n))
+ s = x_train[s],y_train[s]
+ xb,yb = model(xb)
+ preds = loss_func(preds, yb)
+ loss
+ loss.backward()
+ opt.step()
+ opt.zero_grad() report(loss, preds, yb)
loss: 0.15, accuracy: 0.96
loss: 0.11, accuracy: 0.96
loss: 0.06, accuracy: 1.00
# dataset
# dataset
+
Dataset
@@ -711,212 +720,212 @@Dataset
Initialize self. See help(type(self)) for accurate signature.
= Dataset(x_train, y_train), Dataset(x_test, y_test) train_ds, valid_ds
= Dataset(x_train, y_train), Dataset(x_test, y_test) train_ds, valid_ds
= get_model() opt, model
= get_model() opt, model
for epoch in range(epochs):
-for i in range(0, n, bs):
- = train_ds[i: min(i+bs, n)]
- xb,yb = model(xb)
- preds = loss_func(preds, yb)
- loss
- loss.backward()
- opt.step()
- opt.zero_grad() report(loss, preds, yb)
for epoch in range(epochs):
+for i in range(0, n, bs):
+ = train_ds[i: min(i+bs, n)]
+ xb,yb = model(xb)
+ preds = loss_func(preds, yb)
+ loss
+ loss.backward()
+ opt.step()
+ opt.zero_grad() report(loss, preds, yb)
loss: 0.13, accuracy: 0.96
loss: 0.10, accuracy: 0.98
loss: 0.12, accuracy: 0.96
# data loader
# data loader
class DataLoader:
-def __init__(self, ds, bs):
- self.ds, self.bs = ds, bs
-
- def __iter__(self):
- for i in range(0, len(self.ds), self.bs):
- yield self.ds[i:i+self.bs]
class DataLoader:
+def __init__(self, ds, bs):
+ self.ds, self.bs = ds, bs
+
+ def __iter__(self):
+ for i in range(0, len(self.ds), self.bs):
+ yield self.ds[i:i+self.bs]
= DataLoader(train_ds, bs)
- train_dl = DataLoader(valid_ds, bs) valid_dl
= DataLoader(train_ds, bs)
+ train_dl = DataLoader(valid_ds, bs) valid_dl
= next(iter(train_dl))
- xb, yb xb.shape
= next(iter(train_dl))
+ xb, yb xb.shape
torch.Size([50, 784])
= get_model() opt, model
= get_model() opt, model
def fit():
-for epoch in range(epochs):
- for xb,yb in train_dl:
- = model(xb)
- preds = loss_func(preds, yb)
- loss
- loss.backward()
- opt.step()
- opt.zero_grad() report(loss, preds, yb)
def fit():
+for epoch in range(epochs):
+ for xb,yb in train_dl:
+ = model(xb)
+ preds = loss_func(preds, yb)
+ loss
+ loss.backward()
+ opt.step()
+ opt.zero_grad() report(loss, preds, yb)
fit()
fit()
loss: 0.16, accuracy: 0.96
loss: 0.11, accuracy: 0.98
loss: 0.07, accuracy: 0.98
-# random sampling
+# random sampling
import random
import random
class Sampler:
-def __init__(self, ds, shuffle=False):
- self.n, self.shuffle = len(ds), shuffle
-
- def __iter__(self):
- = list(range(self.n))
- res if self.shuffle:
-
- random.shuffle(res)return iter(res)
class Sampler:
+def __init__(self, ds, shuffle=False):
+ self.n, self.shuffle = len(ds), shuffle
+
+ def __iter__(self):
+ = list(range(self.n))
+ res if self.shuffle:
+
+ random.shuffle(res)return iter(res)
from itertools import islice
from itertools import islice
= Sampler(train_ds) ss
= Sampler(train_ds) ss
list(islice(ss, 5))
list(islice(ss, 5))
[0, 1, 2, 3, 4]
import fastcore.all as fc
import fastcore.all as fc
class BatchSampler:
-def __init__(self, sampler, bs, drop_last=False):
-
- fc.store_attr()
- def __iter__(self):
- yield from fc.chunked(iter(self.sampler), self.bs, drop_last=self.drop_last)
class BatchSampler:
+def __init__(self, sampler, bs, drop_last=False):
+
+ fc.store_attr()
+ def __iter__(self):
+ yield from fc.chunked(iter(self.sampler), self.bs, drop_last=self.drop_last)
= BatchSampler(ss, 5)
- batches list(islice(iter(batches), 3))
= BatchSampler(ss, 5)
+ batches list(islice(iter(batches), 3))
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]
def collate(b):
-= zip(*b)
- xs, ys return torch.stack(xs), torch.stack(ys)
def collate(b):
+= zip(*b)
+ xs, ys return torch.stack(xs), torch.stack(ys)
class DataLoader:
-def __init__(self, ds, batchs, collate_fn=collate):
-
- fc.store_attr()
- def __iter__(self):
- yield from (self.collate_fn(self.ds[i] for i in b) for b in self.batchs)
class DataLoader:
+def __init__(self, ds, batchs, collate_fn=collate):
+
+ fc.store_attr()
+ def __iter__(self):
+ yield from (self.collate_fn(self.ds[i] for i in b) for b in self.batchs)
= BatchSampler(Sampler(train_ds, shuffle=True), bs)
- train_sampler = BatchSampler(Sampler(valid_ds, shuffle=True), bs) valid_sampler
= BatchSampler(Sampler(train_ds, shuffle=True), bs)
+ train_sampler = BatchSampler(Sampler(valid_ds, shuffle=True), bs) valid_sampler
= DataLoader(train_ds, train_sampler)
- train_dl = DataLoader(valid_ds, valid_sampler) valid_dl
= DataLoader(train_ds, train_sampler)
+ train_dl = DataLoader(valid_ds, valid_sampler) valid_dl
= next(iter(valid_dl))
- xb, yb xb.shape, yb.shape
= next(iter(valid_dl))
+ xb, yb xb.shape, yb.shape
(torch.Size([50, 784]), torch.Size([50]))
0].view(28, 28));
- plt.imshow(xb['off'); plt.axis(
0].view(28, 28));
+ plt.imshow(xb['off'); plt.axis(
= get_model() opt, model
= get_model() opt, model
fit()
fit()
loss: 0.11, accuracy: 0.94
loss: 0.27, accuracy: 0.96
loss: 0.03, accuracy: 1.00
-# multiprocessing dataloader
+# multiprocessing dataloader
import torch.multiprocessing as mp
import torch.multiprocessing as mp
class DataLoader:
-def __init__(self, ds, batchs, collate_fn=collate, num_workers=1):
-
- fc.store_attr()
- def __iter__(self):
- with mp.Pool(self.num_workers) as ex:
- yield from ex.map(self.ds.__getitem__, iter(self.batchs))
class DataLoader:
+def __init__(self, ds, batchs, collate_fn=collate, num_workers=1):
+
+ fc.store_attr()
+ def __iter__(self):
+ with mp.Pool(self.num_workers) as ex:
+ yield from ex.map(self.ds.__getitem__, iter(self.batchs))
= DataLoader(train_ds, batchs=train_sampler) train_dl
= DataLoader(train_ds, batchs=train_sampler) train_dl
= next(iter(train_dl))
- xb, yb 0].view(28, 28));
- plt.imshow(xb['off'); plt.axis(
= next(iter(train_dl))
+ xb, yb 0].view(28, 28));
+ plt.imshow(xb['off'); plt.axis(
# pytorch dataloaders
# pytorch dataloaders
= RandomSampler(train_ds) t
= RandomSampler(train_ds) t
next(iter(t))
next(iter(t))
24797
= BatchSampler(train_ds, batch_size=2, drop_last=False)
- t
-= next(iter(t))
- k print(len(k))
-for ele in k:
-print(ele[0].shape, ele[1])
= BatchSampler(train_ds, batch_size=2, drop_last=False)
+ t
+= next(iter(t))
+ k print(len(k))
+for ele in k:
+print(ele[0].shape, ele[1])
2
torch.Size([784]) tensor(5)
@@ -924,12 +933,12 @@ Dataset
= BatchSampler(RandomSampler(train_ds), batch_size=2, drop_last=False)
- t
-= next(iter(t))
- k print(len(k))
-for ele in k:
-print(ele)
= BatchSampler(RandomSampler(train_ds), batch_size=2, drop_last=False)
+ t
+= next(iter(t))
+ k print(len(k))
+for ele in k:
+print(ele)
2
33683
@@ -937,16 +946,16 @@ Dataset
= BatchSampler(RandomSampler(train_ds), bs, drop_last=False)
- train_samp = BatchSampler(RandomSampler(valid_ds), bs, drop_last=False) valid_samp
= BatchSampler(RandomSampler(train_ds), bs, drop_last=False)
+ train_samp = BatchSampler(RandomSampler(valid_ds), bs, drop_last=False) valid_samp
= DataLoader(train_ds, batch_sampler=train_samp, collate_fn=collate)
- train_dl = DataLoader(valid_ds, batch_sampler=valid_samp, collate_fn=collate) valid_dl
= DataLoader(train_ds, batch_sampler=train_samp, collate_fn=collate)
+ train_dl = DataLoader(valid_ds, batch_sampler=valid_samp, collate_fn=collate) valid_dl
= get_model()
- opt, model fit()
= get_model()
+ opt, model fit()
loss: 0.20, accuracy: 0.94
loss: 0.11, accuracy: 0.98
@@ -954,26 +963,26 @@ Dataset
= DataLoader(train_ds, bs, shuffle=True, num_workers=2, drop_last=True)
- train_dl = DataLoader(valid_ds, bs, shuffle=False, num_workers=2) valid_dl
= DataLoader(train_ds, bs, shuffle=True, num_workers=2, drop_last=True)
+ train_dl = DataLoader(valid_ds, bs, shuffle=False, num_workers=2) valid_dl
= get_model()
- opt, model fit()
= get_model()
+ opt, model fit()
loss: 0.08, accuracy: 0.98
loss: 0.31, accuracy: 0.86
loss: 0.11, accuracy: 0.98
-# validation
+# validation
fit
-+fit (epochs, model, loss_func, opt, train_dl, valid_ld)
fit (epochs, model, loss_func, opt, train_dl, valid_dl)
get_dls
get_dls (train_ds, valid_ds, bs, **kwargs)
= get_dls(train_ds, valid_ds, bs)
- train_dl, valid_dl = get_model() opt, model
= get_dls(train_ds, valid_ds, bs)
+ train_dl, valid_dl = get_model() opt, model
0 0.1775239165313542 0.948100003004074
1 0.1179210783354938 0.9646000063419342
diff --git a/_proc/_docs/search.json b/_proc/_docs/search.json
index 8a189de4..d248b54e 100644
--- a/_proc/_docs/search.json
+++ b/_proc/_docs/search.json
@@ -54,6 +54,6 @@
"href": "mini_batch_training.html",
"title": "simple 2 layer nn",
"section": "",
- "text": "data_path = Path('../data/mnist.pkl.gz')\nwith gzip.open(data_path, 'r') as f:\n ((x_train, y_train), (x_test, y_test), _) = pickle.load(f, encoding='latin') \nx_train, y_train, x_test, y_test = map(torch.tensor, (x_train, y_train, x_test, y_test))\nx_train.shape, y_train.shape, x_test.shape, y_test.shape\n\n(torch.Size([50000, 784]),\n torch.Size([50000]),\n torch.Size([10000, 784]),\n torch.Size([10000]))\n\n\n\nimg = x_train[0]\nimg = img.view(28, 28)\nplt.imshow(img);\nplt.axis('off');\n\n\n\n\n\n\n\n\n\nclass Model(nn.Module):\n def __init__(self, n_in, n_h, n_o):\n super().__init__()\n self.layers = [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o)]\n \n def __call__(self, x):\n for l in self.layers:\n x = l(x)\n return x\n\n\nn_in = x_train.shape[1]\nn_h = 50\nn_o = 10\n\nmodel = Model(n_in, n_h, n_o)\npred = model(x_train)\npred.shape\n\ntorch.Size([50000, 10])\n\n\n# cross entropy loss\n\ndef log_softmax(x):\n return (x.exp()/x.exp().sum(-1, keepdim=True)).log()\n\n\nlog_softmax(pred)\n\ntensor([[-2.3917, -2.3172, -2.1445, ..., -2.3604, -2.4435, -2.3298],\n [-2.3426, -2.2119, -2.2799, ..., -2.3664, -2.4151, -2.2220],\n [-2.3725, -2.2966, -2.2658, ..., -2.2858, -2.3270, -2.3698],\n ...,\n [-2.4004, -2.3082, -2.1309, ..., -2.3633, -2.4319, -2.2571],\n [-2.4322, -2.3229, -2.1224, ..., -2.3613, -2.4487, -2.2554],\n [-2.3660, -2.2850, -2.0563, ..., -2.3602, -2.5124, -2.3140]],\n grad_fn=<LogBackward0>)\n\n\n# log product to sum trick\n\ndef log_softmax(x):\n return x - x.exp().sum(-1, keepdim=True).log()\n\n\nlog_softmax(pred)\n\ntensor([[-2.3917, -2.3172, -2.1445, ..., -2.3604, -2.4435, -2.3298],\n [-2.3426, -2.2119, -2.2799, ..., -2.3664, -2.4151, -2.2220],\n [-2.3725, -2.2966, -2.2658, ..., -2.2858, -2.3270, -2.3698],\n ...,\n [-2.4004, -2.3082, -2.1309, ..., -2.3633, -2.4319, -2.2571],\n [-2.4322, -2.3229, -2.1224, ..., -2.3613, -2.4487, -2.2554],\n [-2.3660, -2.2850, -2.0563, ..., -2.3602, -2.5124, -2.3140]],\n grad_fn=<SubBackward0>)\n\n\n# log sum exp trick\n* normalize with the maximum value, so avoid exploding big activations.\n\ndef logsumexp(x):\n m = x.max(-1)[-1]\n return m + (x-m[:,None]).exp().sum(-1).log()\n\n\ndef log_softmax(x):\n return x - logsumexp(x)[:,None]\n\n\nlog_softmax(pred)\n\ntensor([[-2.3917, -2.3172, -2.1445, ..., -2.3604, -2.4435, -2.3298],\n [-2.3426, -2.2119, -2.2799, ..., -2.3664, -2.4151, -2.2220],\n [-2.3725, -2.2966, -2.2658, ..., -2.2858, -2.3270, -2.3698],\n ...,\n [-2.4004, -2.3082, -2.1309, ..., -2.3633, -2.4319, -2.2571],\n [-2.4322, -2.3229, -2.1224, ..., -2.3613, -2.4487, -2.2554],\n [-2.3660, -2.2850, -2.0563, ..., -2.3602, -2.5124, -2.3140]],\n grad_fn=<SubBackward0>)\n\n\n# pytorch logsumexp function\n\ndef log_softmax(x):\n return x - x.logsumexp(-1, keepdim=True)\n\n\nlog_softmax(pred)\n\ntensor([[-2.3917, -2.3172, -2.1445, ..., -2.3604, -2.4435, -2.3298],\n [-2.3426, -2.2119, -2.2799, ..., -2.3664, -2.4151, -2.2220],\n [-2.3725, -2.2966, -2.2658, ..., -2.2858, -2.3270, -2.3698],\n ...,\n [-2.4004, -2.3082, -2.1309, ..., -2.3633, -2.4319, -2.2571],\n [-2.4322, -2.3229, -2.1224, ..., -2.3613, -2.4487, -2.2554],\n [-2.3660, -2.2850, -2.0563, ..., -2.3602, -2.5124, -2.3140]],\n grad_fn=<SubBackward0>)\n\n\n# negative log likeliehood\n* for one hot input vector, it simplifies to the following formula.\n\ndef nll(inp, targ):\n return - inp[range(targ.shape[0]), targ].mean()\n\n\nsm_pred = log_softmax(pred)\nloss = nll(sm_pred, y_train)\nloss\n\ntensor(2.3028, grad_fn=<NegBackward0>)\n\n\n# compare it with native pytorch implementation of nll.\n\nloss_pytorch = F.nll_loss(F.log_softmax(pred, -1), y_train)\nloss_pytorch\n\ntensor(2.3028, grad_fn=<NllLossBackward0>)\n\n\n# nll and softmax combined implementation.\n\nloss_pytorch = F.cross_entropy(pred, y_train)\nloss_pytorch\n\ntensor(2.3028, grad_fn=<NllLossBackward0>)\n\n\n# batch training.\n# accuracy.\n\ndef accuracy(out, yb):\n return (out.argmax(1)==yb).float().mean()\n\nloss_func = F.cross_entropy\n\n\nbs = 50\nxb = x_train[:bs]\nyb = y_train[:bs]\npreds = model(xb)\npreds[0]\n\ntensor([-0.0843, -0.0098, 0.1629, 0.1187, 0.1040, 0.0934, -0.1870, -0.0530,\n -0.1361, -0.0224], grad_fn=<SelectBackward0>)\n\n\n\nloss_func(preds, yb)\n\ntensor(2.2846, grad_fn=<NllLossBackward0>)\n\n\n\naccuracy(preds, yb)\n\ntensor(0.1400)\n\n\n\ndef report(loss, preds, yb):\n print(f\"loss: {loss:.2f}, accuracy: {accuracy(preds, yb):.2f}\")\n\n\nreport(loss, preds, yb)\n\nloss: 2.30, accuracy: 0.14\n\n\n\nn, m = x_train.shape\nlr = 0.5\nepochs = 3\nxb,yb = x_train[:bs], y_train[:bs]\npreds = model(xb)\nloss = loss_func(preds, yb)\nreport(loss, preds, yb)\n\nloss: 2.28, accuracy: 0.14\n\n\n\nfor epoch in range(epochs):\n for i in range(0, n, bs):\n s = slice(i, min(i+bs, n))\n xb,yb = x_train[s],y_train[s]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n with torch.no_grad():\n for l in model.layers:\n if hasattr(l, 'weight'):\n l.weight -= l.weight.grad * lr\n l.bias -= l.bias.grad * lr\n l.weight.grad.zero_()\n l.bias.grad.zero_()\n report(loss, preds, yb)\n\nloss: 0.17, accuracy: 0.94\nloss: 0.13, accuracy: 0.94\nloss: 0.13, accuracy: 0.96\n\n\n\n# parameters\n\nm1 = nn.Module()\nm1.foo = nn.Linear(3, 4)\nm1.boo = 'hey'\nm1\n\nModule(\n (foo): Linear(in_features=3, out_features=4, bias=True)\n)\n\n\n\nlist(m1.named_children())\n\n[('foo', Linear(in_features=3, out_features=4, bias=True))]\n\n\n\nlist(m1.parameters())\n\n[Parameter containing:\n tensor([[-0.4626, -0.5572, -0.2930],\n [-0.2142, 0.2954, -0.5759],\n [-0.0873, 0.5067, 0.0329],\n [ 0.1627, 0.2251, -0.2415]], requires_grad=True),\n Parameter containing:\n tensor([-0.4074, 0.0654, 0.3297, -0.2555], requires_grad=True)]\n\n\n\nclass MLP(nn.Module):\n def __init__(self, n_in, n_h, n_out):\n super().__init__()\n self.l1 = nn.Linear(n_in, n_h)\n self.relu = nn.ReLU()\n self.l2 = nn.Linear(n_h, n_out)\n \n def forward(self, x):\n return self.l2(self.relu(self.l1(x)))\n\n\nmodel = MLP(n_in, n_h, 10)\nmodel\n\nMLP(\n (l1): Linear(in_features=784, out_features=50, bias=True)\n (relu): ReLU()\n (l2): Linear(in_features=50, out_features=10, bias=True)\n)\n\n\n\nfor name, l in model.named_children():\n print(f\"{name}: {l}\")\n\nl1: Linear(in_features=784, out_features=50, bias=True)\nrelu: ReLU()\nl2: Linear(in_features=50, out_features=10, bias=True)\n\n\n\nfor p in model.parameters():\n print(p.shape)\n\ntorch.Size([50, 784])\ntorch.Size([50])\ntorch.Size([10, 50])\ntorch.Size([10])\n\n\n\ndef fit():\n for epoch in range(epochs):\n for i in range(0, n, bs):\n s = slice(i, min(i+bs, n))\n xb,yb = x_train[s], y_train[s]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n with torch.no_grad():\n for p in model.parameters():\n p -= p.grad * lr\n model.zero_grad()\n report(loss, preds, yb)\n\n\nfit()\n\nloss: 0.02, accuracy: 1.00\nloss: 0.05, accuracy: 0.98\nloss: 0.03, accuracy: 1.00\n\n\n# nn.Module behind the scene\n\nclass MyModule:\n def __init__(self, n_in, n_h, n_out):\n self._modules = {}\n self.l1 = nn.Linear(n_in, n_h)\n self.l2 = nn.Linear(n_h, n_out)\n self.relu = nn.ReLU()\n \n def __setattr__(self, k, v):\n if not k.startswith('_'):\n self._modules[k] = v\n \n super().__setattr__(k, v)\n \n def __repr__(self):\n return f\"{self._modules}\"\n \n def parameters(self):\n for l in self._modules.values():\n yield from l.parameters()\n\n\nmdl = MyModule(n_in, n_h, n_o)\nmdl\n\n{'l1': Linear(in_features=784, out_features=50, bias=True), 'l2': Linear(in_features=50, out_features=10, bias=True), 'relu': ReLU()}\n\n\n\nfor p in mdl.parameters():\n print(p.shape)\n\ntorch.Size([50, 784])\ntorch.Size([50])\ntorch.Size([10, 50])\ntorch.Size([10])\n\n\n# registering modules\n\nfrom functools import reduce\n\n\nlayers = [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o)]\n\n\nclass Model(nn.Module):\n def __init__(self, layers):\n super().__init__()\n self.layers = layers\n for i,l in enumerate(self.layers):\n self.add_module(f\"layer_{i}\", l)\n \n def forward(self, x):\n return reduce(lambda val, layer: layer(val), self.layers, x)\n\n\nmodel = Model(layers)\nmodel\n\nModel(\n (layer_0): Linear(in_features=784, out_features=50, bias=True)\n (layer_1): ReLU()\n (layer_2): Linear(in_features=50, out_features=10, bias=True)\n)\n\n\n\nmodel(xb).shape\n\ntorch.Size([50, 10])\n\n\n# nn.ModuleList\n\nclass SequentialModel(nn.Module):\n def __init__(self, layers):\n super().__init__()\n self.layers = nn.ModuleList(layers)\n \n def forward(self, x):\n for l in self.layers:\n x = l(x)\n return x\n\n\nmodel = SequentialModel(layers)\nmodel(xb).shape\n\ntorch.Size([50, 10])\n\n\n# nn.Sequential\n\nmodel = nn.Sequential(*layers)\n\n\nfit()\n\nloss: 0.14, accuracy: 0.96\nloss: 0.11, accuracy: 0.96\nloss: 0.05, accuracy: 1.00\n\n\n\nmodel\n\nSequential(\n (0): Linear(in_features=784, out_features=50, bias=True)\n (1): ReLU()\n (2): Linear(in_features=50, out_features=10, bias=True)\n)\n\n\n# optim\n\nclass Optimizer:\n def __init__(self, params, lr=0.5):\n self.params, self.lr = list(params), lr\n \n def step(self):\n with torch.no_grad():\n for p in self.params:\n p -= p.grad * self.lr\n\n def zero_grad(self):\n for p in self.params:\n p.grad.data.zero_()\n\n\nmodel = nn.Sequential(nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o))\n\n\nopt = Optimizer(model.parameters(), lr=lr)\n\n\nfor epoch in range(epochs):\n for i in range(0, n, bs):\n s = slice(i, min(i+bs, n))\n xb,yb = x_train[s],y_train[s]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n opt.step()\n opt.zero_grad()\n report(loss, preds, yb)\n\nloss: 0.13, accuracy: 0.96\nloss: 0.12, accuracy: 0.92\nloss: 0.08, accuracy: 0.96\n\n\n\nfrom torch import optim\n\n\ndef get_model():\n model = nn.Sequential(nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o))\n opt = optim.SGD(model.parameters(), lr=lr)\n return opt, model\n\n\nopt, model = get_model()\nloss_func(model(xb), yb)\n\ntensor(2.2912, grad_fn=<NllLossBackward0>)\n\n\n\nfor epoch in range(epochs):\n for i in range(0, n, bs):\n s = slice(i, min(i+bs, n))\n xb,yb = x_train[s],y_train[s]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n opt.step()\n opt.zero_grad()\n report(loss, preds, yb)\n\nloss: 0.15, accuracy: 0.96\nloss: 0.11, accuracy: 0.96\nloss: 0.06, accuracy: 1.00\n\n\n# dataset\n\n\nDataset\n\n Dataset (x, y)\n\nInitialize self. See help(type(self)) for accurate signature.\n\ntrain_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_test, y_test)\n\n\nopt, model = get_model()\n\n\nfor epoch in range(epochs):\n for i in range(0, n, bs):\n xb,yb = train_ds[i: min(i+bs, n)]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n opt.step()\n opt.zero_grad()\n report(loss, preds, yb)\n\nloss: 0.13, accuracy: 0.96\nloss: 0.10, accuracy: 0.98\nloss: 0.12, accuracy: 0.96\n\n\n# data loader\n\nclass DataLoader:\n def __init__(self, ds, bs):\n self.ds, self.bs = ds, bs\n \n def __iter__(self):\n for i in range(0, len(self.ds), self.bs):\n yield self.ds[i:i+self.bs]\n\n\ntrain_dl = DataLoader(train_ds, bs)\nvalid_dl = DataLoader(valid_ds, bs)\n\n\nxb, yb = next(iter(train_dl))\nxb.shape\n\ntorch.Size([50, 784])\n\n\n\nopt, model = get_model()\n\n\ndef fit():\n for epoch in range(epochs):\n for xb,yb in train_dl:\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n opt.step()\n opt.zero_grad()\n report(loss, preds, yb)\n\n\nfit()\n\nloss: 0.16, accuracy: 0.96\nloss: 0.11, accuracy: 0.98\nloss: 0.07, accuracy: 0.98\n\n\n\n# random sampling\n\nimport random\n\n\nclass Sampler:\n def __init__(self, ds, shuffle=False):\n self.n, self.shuffle = len(ds), shuffle\n \n def __iter__(self):\n res = list(range(self.n))\n if self.shuffle:\n random.shuffle(res)\n return iter(res)\n\n\nfrom itertools import islice\n\n\nss = Sampler(train_ds)\n\n\nlist(islice(ss, 5))\n\n[0, 1, 2, 3, 4]\n\n\n\nimport fastcore.all as fc\n\n\nclass BatchSampler:\n def __init__(self, sampler, bs, drop_last=False):\n fc.store_attr()\n \n def __iter__(self):\n yield from fc.chunked(iter(self.sampler), self.bs, drop_last=self.drop_last)\n\n\nbatches = BatchSampler(ss, 5)\nlist(islice(iter(batches), 3))\n\n[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]\n\n\n\ndef collate(b):\n xs, ys = zip(*b)\n return torch.stack(xs), torch.stack(ys)\n\n\nclass DataLoader:\n def __init__(self, ds, batchs, collate_fn=collate):\n fc.store_attr()\n \n def __iter__(self):\n yield from (self.collate_fn(self.ds[i] for i in b) for b in self.batchs)\n\n\ntrain_sampler = BatchSampler(Sampler(train_ds, shuffle=True), bs)\nvalid_sampler = BatchSampler(Sampler(valid_ds, shuffle=True), bs)\n\n\ntrain_dl = DataLoader(train_ds, train_sampler)\nvalid_dl = DataLoader(valid_ds, valid_sampler)\n\n\nxb, yb = next(iter(valid_dl))\nxb.shape, yb.shape\n\n(torch.Size([50, 784]), torch.Size([50]))\n\n\n\nplt.imshow(xb[0].view(28, 28));\nplt.axis('off');\n\n\n\n\n\n\n\n\n\nopt, model = get_model()\n\n\nfit()\n\nloss: 0.11, accuracy: 0.94\nloss: 0.27, accuracy: 0.96\nloss: 0.03, accuracy: 1.00\n\n\n\n# multiprocessing dataloader\n\nimport torch.multiprocessing as mp\n\n\nclass DataLoader:\n def __init__(self, ds, batchs, collate_fn=collate, num_workers=1):\n fc.store_attr()\n \n def __iter__(self):\n with mp.Pool(self.num_workers) as ex:\n yield from ex.map(self.ds.__getitem__, iter(self.batchs))\n\n\ntrain_dl = DataLoader(train_ds, batchs=train_sampler)\n\n\nxb, yb = next(iter(train_dl))\nplt.imshow(xb[0].view(28, 28));\nplt.axis('off');\n\n\n\n\n\n\n\n\n# pytorch dataloaders\n\nt = RandomSampler(train_ds)\n\n\nnext(iter(t))\n\n24797\n\n\n\nt = BatchSampler(train_ds, batch_size=2, drop_last=False)\n\nk = next(iter(t))\nprint(len(k))\nfor ele in k:\n print(ele[0].shape, ele[1])\n\n2\ntorch.Size([784]) tensor(5)\ntorch.Size([784]) tensor(0)\n\n\n\nt = BatchSampler(RandomSampler(train_ds), batch_size=2, drop_last=False)\n\nk = next(iter(t))\nprint(len(k))\nfor ele in k:\n print(ele)\n\n2\n33683\n36592\n\n\n\ntrain_samp = BatchSampler(RandomSampler(train_ds), bs, drop_last=False)\nvalid_samp = BatchSampler(RandomSampler(valid_ds), bs, drop_last=False)\n\n\ntrain_dl = DataLoader(train_ds, batch_sampler=train_samp, collate_fn=collate)\nvalid_dl = DataLoader(valid_ds, batch_sampler=valid_samp, collate_fn=collate)\n\n\nopt, model = get_model()\nfit()\n\nloss: 0.20, accuracy: 0.94\nloss: 0.11, accuracy: 0.98\nloss: 0.20, accuracy: 0.98\n\n\n\ntrain_dl = DataLoader(train_ds, bs, shuffle=True, num_workers=2, drop_last=True)\nvalid_dl = DataLoader(valid_ds, bs, shuffle=False, num_workers=2)\n\n\nopt, model = get_model()\nfit()\n\nloss: 0.08, accuracy: 0.98\nloss: 0.31, accuracy: 0.86\nloss: 0.11, accuracy: 0.98\n\n\n\n# validation\n\n\n\nfit\n\n fit (epochs, model, loss_func, opt, train_dl, valid_ld)\n\n\n\n\nget_dls\n\n get_dls (train_ds, valid_ds, bs, **kwargs)\n\n\ntrain_dl, valid_dl = get_dls(train_ds, valid_ds, bs)\nopt, model = get_model()\n\n\n\n\n0 0.1775239165313542 0.948100003004074\n1 0.1179210783354938 0.9646000063419342\n2 0.11550588405691087 0.9665000039339066\n3 0.10593999677803367 0.9698000079393387\n4 0.10098711441038176 0.9727000087499619\nCPU times: user 17.8 s, sys: 16.1 s, total: 33.8 s\nWall time: 4.71 s"
+ "text": "data_path = Path('../data/mnist.pkl.gz')\nwith gzip.open(data_path, 'r') as f:\n ((x_train, y_train), (x_test, y_test), _) = pickle.load(f, encoding='latin') \nx_train, y_train, x_test, y_test = map(torch.tensor, (x_train, y_train, x_test, y_test))\nx_train.shape, y_train.shape, x_test.shape, y_test.shape\n\n(torch.Size([50000, 784]),\n torch.Size([50000]),\n torch.Size([10000, 784]),\n torch.Size([10000]))\n\n\n\nimg = x_train[0]\nimg = img.view(28, 28)\nplt.imshow(img);\nplt.axis('off');\n\n\n\n\n\n\n\n\n\nclass Model(nn.Module):\n def __init__(self, n_in, n_h, n_o):\n super().__init__()\n self.layers = [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o)]\n \n def __call__(self, x):\n for l in self.layers:\n x = l(x)\n return x\n\n\nn_in = x_train.shape[1]\nn_h = 50\nn_o = 10\n\nmodel = Model(n_in, n_h, n_o)\npred = model(x_train)\npred.shape\n\ntorch.Size([50000, 10])\n\n\n# cross entropy loss\n\ndef log_softmax(x):\n return (x.exp()/x.exp().sum(-1, keepdim=True)).log()\n\n\nlog_softmax(pred)\n\ntensor([[-2.3917, -2.3172, -2.1445, ..., -2.3604, -2.4435, -2.3298],\n [-2.3426, -2.2119, -2.2799, ..., -2.3664, -2.4151, -2.2220],\n [-2.3725, -2.2966, -2.2658, ..., -2.2858, -2.3270, -2.3698],\n ...,\n [-2.4004, -2.3082, -2.1309, ..., -2.3633, -2.4319, -2.2571],\n [-2.4322, -2.3229, -2.1224, ..., -2.3613, -2.4487, -2.2554],\n [-2.3660, -2.2850, -2.0563, ..., -2.3602, -2.5124, -2.3140]],\n grad_fn=<LogBackward0>)\n\n\n# log product to sum trick\n\ndef log_softmax(x):\n return x - x.exp().sum(-1, keepdim=True).log()\n\n\nlog_softmax(pred)\n\ntensor([[-2.3917, -2.3172, -2.1445, ..., -2.3604, -2.4435, -2.3298],\n [-2.3426, -2.2119, -2.2799, ..., -2.3664, -2.4151, -2.2220],\n [-2.3725, -2.2966, -2.2658, ..., -2.2858, -2.3270, -2.3698],\n ...,\n [-2.4004, -2.3082, -2.1309, ..., -2.3633, -2.4319, -2.2571],\n [-2.4322, -2.3229, -2.1224, ..., -2.3613, -2.4487, -2.2554],\n [-2.3660, -2.2850, -2.0563, ..., -2.3602, -2.5124, -2.3140]],\n grad_fn=<SubBackward0>)\n\n\n# log sum exp trick\n* normalize with the maximum value, so avoid exploding big activations.\n\ndef logsumexp(x):\n m = x.max(-1)[-1]\n return m + (x-m[:,None]).exp().sum(-1).log()\n\n\ndef log_softmax(x):\n return x - logsumexp(x)[:,None]\n\n\nlog_softmax(pred)\n\ntensor([[-2.3917, -2.3172, -2.1445, ..., -2.3604, -2.4435, -2.3298],\n [-2.3426, -2.2119, -2.2799, ..., -2.3664, -2.4151, -2.2220],\n [-2.3725, -2.2966, -2.2658, ..., -2.2858, -2.3270, -2.3698],\n ...,\n [-2.4004, -2.3082, -2.1309, ..., -2.3633, -2.4319, -2.2571],\n [-2.4322, -2.3229, -2.1224, ..., -2.3613, -2.4487, -2.2554],\n [-2.3660, -2.2850, -2.0563, ..., -2.3602, -2.5124, -2.3140]],\n grad_fn=<SubBackward0>)\n\n\n# pytorch logsumexp function\n\ndef log_softmax(x):\n return x - x.logsumexp(-1, keepdim=True)\n\n\nlog_softmax(pred)\n\ntensor([[-2.3917, -2.3172, -2.1445, ..., -2.3604, -2.4435, -2.3298],\n [-2.3426, -2.2119, -2.2799, ..., -2.3664, -2.4151, -2.2220],\n [-2.3725, -2.2966, -2.2658, ..., -2.2858, -2.3270, -2.3698],\n ...,\n [-2.4004, -2.3082, -2.1309, ..., -2.3633, -2.4319, -2.2571],\n [-2.4322, -2.3229, -2.1224, ..., -2.3613, -2.4487, -2.2554],\n [-2.3660, -2.2850, -2.0563, ..., -2.3602, -2.5124, -2.3140]],\n grad_fn=<SubBackward0>)\n\n\n# negative log likeliehood\n* for one hot input vector, it simplifies to the following formula.\n\ndef nll(inp, targ):\n return - inp[range(targ.shape[0]), targ].mean()\n\n\nsm_pred = log_softmax(pred)\nloss = nll(sm_pred, y_train)\nloss\n\ntensor(2.3028, grad_fn=<NegBackward0>)\n\n\n# compare it with native pytorch implementation of nll.\n\nloss_pytorch = F.nll_loss(F.log_softmax(pred, -1), y_train)\nloss_pytorch\n\ntensor(2.3028, grad_fn=<NllLossBackward0>)\n\n\n# nll and softmax combined implementation.\n\nloss_pytorch = F.cross_entropy(pred, y_train)\nloss_pytorch\n\ntensor(2.3028, grad_fn=<NllLossBackward0>)\n\n\n# batch training.\n# accuracy.\n\n\naccuracy\n\n accuracy (out, yb)\n\n\nloss_func = F.cross_entropy\n\n\nbs = 50\nxb = x_train[:bs]\nyb = y_train[:bs]\npreds = model(xb)\npreds[0]\n\ntensor([-0.0843, -0.0098, 0.1629, 0.1187, 0.1040, 0.0934, -0.1870, -0.0530,\n -0.1361, -0.0224], grad_fn=<SelectBackward0>)\n\n\n\nloss_func(preds, yb)\n\ntensor(2.2846, grad_fn=<NllLossBackward0>)\n\n\n\naccuracy(preds, yb)\n\ntensor(0.1400)\n\n\n\n\n\nreport\n\n report (loss, preds, yb)\n\n\nreport(loss, preds, yb)\n\nloss: 2.30, accuracy: 0.14\n\n\n\nn, m = x_train.shape\nlr = 0.5\nepochs = 3\nxb,yb = x_train[:bs], y_train[:bs]\npreds = model(xb)\nloss = loss_func(preds, yb)\nreport(loss, preds, yb)\n\nloss: 2.28, accuracy: 0.14\n\n\n\nfor epoch in range(epochs):\n for i in range(0, n, bs):\n s = slice(i, min(i+bs, n))\n xb,yb = x_train[s],y_train[s]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n with torch.no_grad():\n for l in model.layers:\n if hasattr(l, 'weight'):\n l.weight -= l.weight.grad * lr\n l.bias -= l.bias.grad * lr\n l.weight.grad.zero_()\n l.bias.grad.zero_()\n report(loss, preds, yb)\n\nloss: 0.17, accuracy: 0.94\nloss: 0.13, accuracy: 0.94\nloss: 0.13, accuracy: 0.96\n\n\n\n# parameters\n\nm1 = nn.Module()\nm1.foo = nn.Linear(3, 4)\nm1.boo = 'hey'\nm1\n\nModule(\n (foo): Linear(in_features=3, out_features=4, bias=True)\n)\n\n\n\nlist(m1.named_children())\n\n[('foo', Linear(in_features=3, out_features=4, bias=True))]\n\n\n\nlist(m1.parameters())\n\n[Parameter containing:\n tensor([[-0.4626, -0.5572, -0.2930],\n [-0.2142, 0.2954, -0.5759],\n [-0.0873, 0.5067, 0.0329],\n [ 0.1627, 0.2251, -0.2415]], requires_grad=True),\n Parameter containing:\n tensor([-0.4074, 0.0654, 0.3297, -0.2555], requires_grad=True)]\n\n\n\nclass MLP(nn.Module):\n def __init__(self, n_in, n_h, n_out):\n super().__init__()\n self.l1 = nn.Linear(n_in, n_h)\n self.relu = nn.ReLU()\n self.l2 = nn.Linear(n_h, n_out)\n \n def forward(self, x):\n return self.l2(self.relu(self.l1(x)))\n\n\nmodel = MLP(n_in, n_h, 10)\nmodel\n\nMLP(\n (l1): Linear(in_features=784, out_features=50, bias=True)\n (relu): ReLU()\n (l2): Linear(in_features=50, out_features=10, bias=True)\n)\n\n\n\nfor name, l in model.named_children():\n print(f\"{name}: {l}\")\n\nl1: Linear(in_features=784, out_features=50, bias=True)\nrelu: ReLU()\nl2: Linear(in_features=50, out_features=10, bias=True)\n\n\n\nfor p in model.parameters():\n print(p.shape)\n\ntorch.Size([50, 784])\ntorch.Size([50])\ntorch.Size([10, 50])\ntorch.Size([10])\n\n\n\ndef fit():\n for epoch in range(epochs):\n for i in range(0, n, bs):\n s = slice(i, min(i+bs, n))\n xb,yb = x_train[s], y_train[s]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n with torch.no_grad():\n for p in model.parameters():\n p -= p.grad * lr\n model.zero_grad()\n report(loss, preds, yb)\n\n\nfit()\n\nloss: 0.02, accuracy: 1.00\nloss: 0.05, accuracy: 0.98\nloss: 0.03, accuracy: 1.00\n\n\n# nn.Module behind the scene\n\nclass MyModule:\n def __init__(self, n_in, n_h, n_out):\n self._modules = {}\n self.l1 = nn.Linear(n_in, n_h)\n self.l2 = nn.Linear(n_h, n_out)\n self.relu = nn.ReLU()\n \n def __setattr__(self, k, v):\n if not k.startswith('_'):\n self._modules[k] = v\n \n super().__setattr__(k, v)\n \n def __repr__(self):\n return f\"{self._modules}\"\n \n def parameters(self):\n for l in self._modules.values():\n yield from l.parameters()\n\n\nmdl = MyModule(n_in, n_h, n_o)\nmdl\n\n{'l1': Linear(in_features=784, out_features=50, bias=True), 'l2': Linear(in_features=50, out_features=10, bias=True), 'relu': ReLU()}\n\n\n\nfor p in mdl.parameters():\n print(p.shape)\n\ntorch.Size([50, 784])\ntorch.Size([50])\ntorch.Size([10, 50])\ntorch.Size([10])\n\n\n# registering modules\n\nfrom functools import reduce\n\n\nlayers = [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o)]\n\n\nclass Model(nn.Module):\n def __init__(self, layers):\n super().__init__()\n self.layers = layers\n for i,l in enumerate(self.layers):\n self.add_module(f\"layer_{i}\", l)\n \n def forward(self, x):\n return reduce(lambda val, layer: layer(val), self.layers, x)\n\n\nmodel = Model(layers)\nmodel\n\nModel(\n (layer_0): Linear(in_features=784, out_features=50, bias=True)\n (layer_1): ReLU()\n (layer_2): Linear(in_features=50, out_features=10, bias=True)\n)\n\n\n\nmodel(xb).shape\n\ntorch.Size([50, 10])\n\n\n# nn.ModuleList\n\nclass SequentialModel(nn.Module):\n def __init__(self, layers):\n super().__init__()\n self.layers = nn.ModuleList(layers)\n \n def forward(self, x):\n for l in self.layers:\n x = l(x)\n return x\n\n\nmodel = SequentialModel(layers)\nmodel(xb).shape\n\ntorch.Size([50, 10])\n\n\n# nn.Sequential\n\nmodel = nn.Sequential(*layers)\n\n\nfit()\n\nloss: 0.14, accuracy: 0.96\nloss: 0.11, accuracy: 0.96\nloss: 0.05, accuracy: 1.00\n\n\n\nmodel\n\nSequential(\n (0): Linear(in_features=784, out_features=50, bias=True)\n (1): ReLU()\n (2): Linear(in_features=50, out_features=10, bias=True)\n)\n\n\n# optim\n\nclass Optimizer:\n def __init__(self, params, lr=0.5):\n self.params, self.lr = list(params), lr\n \n def step(self):\n with torch.no_grad():\n for p in self.params:\n p -= p.grad * self.lr\n\n def zero_grad(self):\n for p in self.params:\n p.grad.data.zero_()\n\n\nmodel = nn.Sequential(nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o))\n\n\nopt = Optimizer(model.parameters(), lr=lr)\n\n\nfor epoch in range(epochs):\n for i in range(0, n, bs):\n s = slice(i, min(i+bs, n))\n xb,yb = x_train[s],y_train[s]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n opt.step()\n opt.zero_grad()\n report(loss, preds, yb)\n\nloss: 0.13, accuracy: 0.96\nloss: 0.12, accuracy: 0.92\nloss: 0.08, accuracy: 0.96\n\n\n\nfrom torch import optim\n\n\ndef get_model():\n model = nn.Sequential(nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_o))\n opt = optim.SGD(model.parameters(), lr=lr)\n return opt, model\n\n\nopt, model = get_model()\nloss_func(model(xb), yb)\n\ntensor(2.2912, grad_fn=<NllLossBackward0>)\n\n\n\nfor epoch in range(epochs):\n for i in range(0, n, bs):\n s = slice(i, min(i+bs, n))\n xb,yb = x_train[s],y_train[s]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n opt.step()\n opt.zero_grad()\n report(loss, preds, yb)\n\nloss: 0.15, accuracy: 0.96\nloss: 0.11, accuracy: 0.96\nloss: 0.06, accuracy: 1.00\n\n\n# dataset\n\n\n\nDataset\n\n Dataset (x, y)\n\nInitialize self. See help(type(self)) for accurate signature.\n\ntrain_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_test, y_test)\n\n\nopt, model = get_model()\n\n\nfor epoch in range(epochs):\n for i in range(0, n, bs):\n xb,yb = train_ds[i: min(i+bs, n)]\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n opt.step()\n opt.zero_grad()\n report(loss, preds, yb)\n\nloss: 0.13, accuracy: 0.96\nloss: 0.10, accuracy: 0.98\nloss: 0.12, accuracy: 0.96\n\n\n# data loader\n\nclass DataLoader:\n def __init__(self, ds, bs):\n self.ds, self.bs = ds, bs\n \n def __iter__(self):\n for i in range(0, len(self.ds), self.bs):\n yield self.ds[i:i+self.bs]\n\n\ntrain_dl = DataLoader(train_ds, bs)\nvalid_dl = DataLoader(valid_ds, bs)\n\n\nxb, yb = next(iter(train_dl))\nxb.shape\n\ntorch.Size([50, 784])\n\n\n\nopt, model = get_model()\n\n\ndef fit():\n for epoch in range(epochs):\n for xb,yb in train_dl:\n preds = model(xb)\n loss = loss_func(preds, yb)\n loss.backward()\n opt.step()\n opt.zero_grad()\n report(loss, preds, yb)\n\n\nfit()\n\nloss: 0.16, accuracy: 0.96\nloss: 0.11, accuracy: 0.98\nloss: 0.07, accuracy: 0.98\n\n\n\n# random sampling\n\nimport random\n\n\nclass Sampler:\n def __init__(self, ds, shuffle=False):\n self.n, self.shuffle = len(ds), shuffle\n \n def __iter__(self):\n res = list(range(self.n))\n if self.shuffle:\n random.shuffle(res)\n return iter(res)\n\n\nfrom itertools import islice\n\n\nss = Sampler(train_ds)\n\n\nlist(islice(ss, 5))\n\n[0, 1, 2, 3, 4]\n\n\n\nimport fastcore.all as fc\n\n\nclass BatchSampler:\n def __init__(self, sampler, bs, drop_last=False):\n fc.store_attr()\n \n def __iter__(self):\n yield from fc.chunked(iter(self.sampler), self.bs, drop_last=self.drop_last)\n\n\nbatches = BatchSampler(ss, 5)\nlist(islice(iter(batches), 3))\n\n[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]\n\n\n\ndef collate(b):\n xs, ys = zip(*b)\n return torch.stack(xs), torch.stack(ys)\n\n\nclass DataLoader:\n def __init__(self, ds, batchs, collate_fn=collate):\n fc.store_attr()\n \n def __iter__(self):\n yield from (self.collate_fn(self.ds[i] for i in b) for b in self.batchs)\n\n\ntrain_sampler = BatchSampler(Sampler(train_ds, shuffle=True), bs)\nvalid_sampler = BatchSampler(Sampler(valid_ds, shuffle=True), bs)\n\n\ntrain_dl = DataLoader(train_ds, train_sampler)\nvalid_dl = DataLoader(valid_ds, valid_sampler)\n\n\nxb, yb = next(iter(valid_dl))\nxb.shape, yb.shape\n\n(torch.Size([50, 784]), torch.Size([50]))\n\n\n\nplt.imshow(xb[0].view(28, 28));\nplt.axis('off');\n\n\n\n\n\n\n\n\n\nopt, model = get_model()\n\n\nfit()\n\nloss: 0.11, accuracy: 0.94\nloss: 0.27, accuracy: 0.96\nloss: 0.03, accuracy: 1.00\n\n\n\n# multiprocessing dataloader\n\nimport torch.multiprocessing as mp\n\n\nclass DataLoader:\n def __init__(self, ds, batchs, collate_fn=collate, num_workers=1):\n fc.store_attr()\n \n def __iter__(self):\n with mp.Pool(self.num_workers) as ex:\n yield from ex.map(self.ds.__getitem__, iter(self.batchs))\n\n\ntrain_dl = DataLoader(train_ds, batchs=train_sampler)\n\n\nxb, yb = next(iter(train_dl))\nplt.imshow(xb[0].view(28, 28));\nplt.axis('off');\n\n\n\n\n\n\n\n\n# pytorch dataloaders\n\nt = RandomSampler(train_ds)\n\n\nnext(iter(t))\n\n24797\n\n\n\nt = BatchSampler(train_ds, batch_size=2, drop_last=False)\n\nk = next(iter(t))\nprint(len(k))\nfor ele in k:\n print(ele[0].shape, ele[1])\n\n2\ntorch.Size([784]) tensor(5)\ntorch.Size([784]) tensor(0)\n\n\n\nt = BatchSampler(RandomSampler(train_ds), batch_size=2, drop_last=False)\n\nk = next(iter(t))\nprint(len(k))\nfor ele in k:\n print(ele)\n\n2\n33683\n36592\n\n\n\ntrain_samp = BatchSampler(RandomSampler(train_ds), bs, drop_last=False)\nvalid_samp = BatchSampler(RandomSampler(valid_ds), bs, drop_last=False)\n\n\ntrain_dl = DataLoader(train_ds, batch_sampler=train_samp, collate_fn=collate)\nvalid_dl = DataLoader(valid_ds, batch_sampler=valid_samp, collate_fn=collate)\n\n\nopt, model = get_model()\nfit()\n\nloss: 0.20, accuracy: 0.94\nloss: 0.11, accuracy: 0.98\nloss: 0.20, accuracy: 0.98\n\n\n\ntrain_dl = DataLoader(train_ds, bs, shuffle=True, num_workers=2, drop_last=True)\nvalid_dl = DataLoader(valid_ds, bs, shuffle=False, num_workers=2)\n\n\nopt, model = get_model()\nfit()\n\nloss: 0.08, accuracy: 0.98\nloss: 0.31, accuracy: 0.86\nloss: 0.11, accuracy: 0.98\n\n\n\n# validation\n\n\n\nfit\n\n fit (epochs, model, loss_func, opt, train_dl, valid_dl)\n\n\n\n\nget_dls\n\n get_dls (train_ds, valid_ds, bs, **kwargs)\n\n\ntrain_dl, valid_dl = get_dls(train_ds, valid_ds, bs)\nopt, model = get_model()\n\n\n\n\n0 0.1775239165313542 0.948100003004074\n1 0.1179210783354938 0.9646000063419342\n2 0.11550588405691087 0.9665000039339066\n3 0.10593999677803367 0.9698000079393387\n4 0.10098711441038176 0.9727000087499619\nCPU times: user 17.8 s, sys: 16.1 s, total: 33.8 s\nWall time: 4.71 s"
}
]
\ No newline at end of file
diff --git a/_proc/_docs/sitemap.xml b/_proc/_docs/sitemap.xml
index b6ac0702..6a4285d9 100644
--- a/_proc/_docs/sitemap.xml
+++ b/_proc/_docs/sitemap.xml
@@ -10,6 +10,6 @@
https://arun477.github.io/practice_deep_learning/mini_batch_training.html
- 2024-02-19T15:03:49.698Z
+ 2024-02-19T15:14:43.526Z
diff --git a/nbs/.ipynb_checkpoints/04_mini_batch_training-checkpoint.ipynb b/nbs/.ipynb_checkpoints/04_mini_batch_training-checkpoint.ipynb
index 14e5e454..fade6c19 100644
--- a/nbs/.ipynb_checkpoints/04_mini_batch_training-checkpoint.ipynb
+++ b/nbs/.ipynb_checkpoints/04_mini_batch_training-checkpoint.ipynb
@@ -489,9 +489,19 @@
"metadata": {},
"outputs": [],
"source": [
- "def accuracy(out, yb):\n",
- " return (out.argmax(1)==yb).float().mean()\n",
+ "#|export\n",
"\n",
+ "def accuracy(out, yb):\n",
+ " return (out.argmax(1)==yb).float().mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dcca7a8d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
"loss_func = F.cross_entropy"
]
},
@@ -570,6 +580,8 @@
"metadata": {},
"outputs": [],
"source": [
+ "#|export\n",
+ "\n",
"def report(loss, preds, yb):\n",
" print(f\"loss: {loss:.2f}, accuracy: {accuracy(preds, yb):.2f}\")"
]
@@ -2029,7 +2041,7 @@
"source": [
"#|export\n",
"\n",
- "def fit(epochs, model, loss_func, opt, train_dl, valid_ld):\n",
+ "def fit(epochs, model, loss_func, opt, train_dl, valid_dl):\n",
" for epoch in range(epochs):\n",
" model.train()\n",
" for xb, yb in train_dl:\n",
diff --git a/nbs/04_mini_batch_training.ipynb b/nbs/04_mini_batch_training.ipynb
index 14e5e454..fade6c19 100644
--- a/nbs/04_mini_batch_training.ipynb
+++ b/nbs/04_mini_batch_training.ipynb
@@ -489,9 +489,19 @@
"metadata": {},
"outputs": [],
"source": [
- "def accuracy(out, yb):\n",
- " return (out.argmax(1)==yb).float().mean()\n",
+ "#|export\n",
"\n",
+ "def accuracy(out, yb):\n",
+ " return (out.argmax(1)==yb).float().mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dcca7a8d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
"loss_func = F.cross_entropy"
]
},
@@ -570,6 +580,8 @@
"metadata": {},
"outputs": [],
"source": [
+ "#|export\n",
+ "\n",
"def report(loss, preds, yb):\n",
" print(f\"loss: {loss:.2f}, accuracy: {accuracy(preds, yb):.2f}\")"
]
@@ -2029,7 +2041,7 @@
"source": [
"#|export\n",
"\n",
- "def fit(epochs, model, loss_func, opt, train_dl, valid_ld):\n",
+ "def fit(epochs, model, loss_func, opt, train_dl, valid_dl):\n",
" for epoch in range(epochs):\n",
" model.train()\n",
" for xb, yb in train_dl:\n",
diff --git a/practice_deep_learning/__pycache__/__init__.cpython-39.pyc b/practice_deep_learning/__pycache__/__init__.cpython-39.pyc
index 2724c842..27306e92 100644
Binary files a/practice_deep_learning/__pycache__/__init__.cpython-39.pyc and b/practice_deep_learning/__pycache__/__init__.cpython-39.pyc differ
diff --git a/practice_deep_learning/__pycache__/training.cpython-39.pyc b/practice_deep_learning/__pycache__/training.cpython-39.pyc
new file mode 100644
index 00000000..d40ca636
Binary files /dev/null and b/practice_deep_learning/__pycache__/training.cpython-39.pyc differ
diff --git a/practice_deep_learning/_modidx.py b/practice_deep_learning/_modidx.py
index a18489cf..112ad008 100644
--- a/practice_deep_learning/_modidx.py
+++ b/practice_deep_learning/_modidx.py
@@ -13,7 +13,11 @@
'practice_deep_learning/training.py'),
'practice_deep_learning.training.Dataset.__len__': ( 'mini_batch_training.html#dataset.__len__',
'practice_deep_learning/training.py'),
+ 'practice_deep_learning.training.accuracy': ( 'mini_batch_training.html#accuracy',
+ 'practice_deep_learning/training.py'),
'practice_deep_learning.training.fit': ( 'mini_batch_training.html#fit',
'practice_deep_learning/training.py'),
'practice_deep_learning.training.get_dls': ( 'mini_batch_training.html#get_dls',
- 'practice_deep_learning/training.py')}}}
+ 'practice_deep_learning/training.py'),
+ 'practice_deep_learning.training.report': ( 'mini_batch_training.html#report',
+ 'practice_deep_learning/training.py')}}}
diff --git a/practice_deep_learning/training.py b/practice_deep_learning/training.py
index 1f4024d4..fbda8698 100644
--- a/practice_deep_learning/training.py
+++ b/practice_deep_learning/training.py
@@ -1,7 +1,7 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_mini_batch_training.ipynb.
# %% auto 0
-__all__ = ['Dataset', 'fit', 'get_dls']
+__all__ = ['accuracy', 'report', 'Dataset', 'fit', 'get_dls']
# %% ../nbs/04_mini_batch_training.ipynb 1
import torch, torch.nn as nn
@@ -9,7 +9,15 @@
from pathlib import Path
import gzip, pickle, matplotlib.pyplot as plt
-# %% ../nbs/04_mini_batch_training.ipynb 74
+# %% ../nbs/04_mini_batch_training.ipynb 29
+def accuracy(out, yb):
+ return (out.argmax(1)==yb).float().mean()
+
+# %% ../nbs/04_mini_batch_training.ipynb 34
+def report(loss, preds, yb):
+ print(f"loss: {loss:.2f}, accuracy: {accuracy(preds, yb):.2f}")
+
+# %% ../nbs/04_mini_batch_training.ipynb 75
class Dataset:
def __init__(self, x, y):
self.x, self.y = x, y
@@ -20,11 +28,11 @@ def __len__(self):
def __getitem__(self, i):
return self.x[i], self.y[i]
-# %% ../nbs/04_mini_batch_training.ipynb 108
+# %% ../nbs/04_mini_batch_training.ipynb 109
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler, BatchSampler
-# %% ../nbs/04_mini_batch_training.ipynb 119
-def fit(epochs, model, loss_func, opt, train_dl, valid_ld):
+# %% ../nbs/04_mini_batch_training.ipynb 120
+def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
@@ -47,7 +55,7 @@ def fit(epochs, model, loss_func, opt, train_dl, valid_ld):
return total_loss/count, total_acc/count
-# %% ../nbs/04_mini_batch_training.ipynb 120
+# %% ../nbs/04_mini_batch_training.ipynb 121
def get_dls(train_ds, valid_ds, bs, **kwargs):
return (
DataLoader(train_ds, bs, shuffle=True, **kwargs),