-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsession5.Rmd
377 lines (318 loc) · 11.8 KB
/
session5.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
---
title: "Session 5: Self-Supervised Learning"
author: "Alexandra Strassmann, Samuel Pawel"
date: "29. April 2020"
output: html_document
---
```{r setup, include = FALSE}
knitr::opts_chunk$set(warning = FALSE,
message = FALSE,
fig.align = "center",
cache = TRUE)
```
## Plan
* **Supervised learning**
1. *Benchmark*: Fit a neural network to classify images on a large data set with 60000 labelled images as a benchmark
2. *Benchmark with few labels*: Mimic situation where only very few labels (150 images) are available and fit same model again
* **Self-supervised learning**
1. *Pretext task*: Perform rotation task on full, unlabelled data set (60000 images). Use random rotation of image as pseudo-label.
2. *Downstream task with few labels*: Transfer weights from rotation task and try classification again with small data set (150 images)
## Libraries and data
```{r fig.height = 5}
## Libraries
## ------------------------------------------------------------------
library(keras)
library(OpenImageR) # for rotation of images with rotateImage()
set.seed(42)
## Data
## ------------------------------------------------------------------
## loading data
mnist <- dataset_mnist()
str(mnist)
ntrain <- length(mnist$train$y)
ntest <- length(mnist$test$y)
## normalize and reshape images
mnist$train$x <- mnist$train$x / 255
mnist$train$x <- array(mnist$train$x, dim = c(ntrain, 28, 28, 1))
mnist$test$x <- mnist$test$x / 255
mnist$test$x <- array(mnist$test$x, dim = c(ntest, 28, 28, 1))
## one-hot encode labels
mnist$train$yBin <- to_categorical(mnist$train$y, 10)
mnist$test$yBin <- to_categorical(mnist$test$y, 10)
## some plots of the data
par(mfrow = c(4, 4), mai = rep(0.15, 4))
for (i in seq(1, 16)) {
image(t(apply(mnist$train$x[i,,,1], 2, rev)),
col = hcl.colors(256, palette = "inferno"),
axes = FALSE, main = mnist$train$y[i])
}
```
## Fitting a convolutional neural network model (CNN) to all training data
```{r}
## CNN model
## ------------------------------------------------------------------
## adapted from:
## https://blog.tensorflow.org/2018/04/fashion-mnist-with-tfkeras.html
model <- keras_model_sequential(name = "MNIST-CNN") %>%
layer_conv_2d(filters = 64, kernel_size = 2, padding = "same",
activation = "relu", input_shape = c(28, 28, 1)) %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.3) %>%
layer_conv_2d(filters = 32, kernel_size = 2, padding = "same",
activation = "relu") %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.3) %>%
layer_flatten() %>%
layer_dense(units = 256, activation = "relu") %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 10, activation = "softmax")
summary(model)
## compile model
model %>%
compile(loss = "categorical_crossentropy",
optimizer = "adam",
metrics = c("accuracy"))
## fit model
history <- model %>%
fit(x = mnist$train$x,
y = mnist$train$yBin,
batch_size = 64,
epochs = 10,
validation_split = 0.2)
## plot change of loss/accuracy over course of optimization
plot(history)
## evaluate on test data
model %>%
evaluate(x = mnist$test$x,
y = mnist$test$yBin,
verbose = 0)
```
## Fitting same model on only small part of training data
```{r}
## only taking the first 0.25% training samples to fit model
smallData <- function(perc = 0.0025) {
smallerN <- 0.0025*ntrain
ind <- sample(x = seq(1, ntrain), size = smallerN, replace = FALSE)
x <- array(mnist$train$x[ind,,,], dim = c(smallerN, 28, 28, 1))
yBin <- mnist$train$yBin[ind,]
return(list(x = x, yBin = yBin))
}
## formulating same model
modelSmall <- keras_model_sequential(name = "MNIST-CNN-Small") %>%
layer_conv_2d(filters = 64, kernel_size = 2, padding = "same",
activation = "relu", input_shape = c(28, 28, 1)) %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.3) %>%
layer_conv_2d(filters = 32, kernel_size = 2, padding = "same",
activation = "relu") %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.3) %>%
layer_flatten() %>%
layer_dense(units = 256, activation = "relu") %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 10, activation = "softmax")
## compile model
modelSmall %>%
compile(loss = "categorical_crossentropy",
optimizer = "adam",
metrics = c("accuracy"))
## fit model M times with different small data sets and get test error
M <- 20
weightsModelSmall <- get_weights(modelSmall)
testerrSuper <- t(replicate(n = M, expr = {
## overwrite weights with initialization weights
modelSmall %>%
set_weights(weights = weightsModelSmall)
## fit to a small data set
sdat <- smallData(perc = 0.0025)
modelSmall %>%
fit(x = sdat$x,
y = sdat$yBin,
batch_size = 1,
epochs = 10,
validation_split = 0.2,
verbose = FALSE)
## get test error
testErr <- modelSmall %>%
evaluate(x = mnist$test$x,
y = mnist$test$yBin,
verbose = 0)
return(c(loss = testErr$loss, accuracy = testErr$accuracy))
}, simplify = TRUE))
par(mfrow = c(1, 2), mai = rep(0.5, 4))
boxplot(testerrSuper[,2], main = "Accuracy (%)")
stripchart(testerrSuper[,2], add = TRUE, vertical = TRUE, jitter = 0.1,
pch = 1, method = "jitter")
boxplot(testerrSuper[,1], main = "Loss")
stripchart(testerrSuper[,1], add = TRUE, vertical = TRUE, jitter = 0.1,
pch = 1, method = "jitter")
```
## Self-Supervised Learning: Pretext Task
```{r}
## Pretext task
## ----------------------------------------------------------------------------
## Pseudolabel p: random rotation of image
## 0: 0°, 1: 90°, 2: 180°, 3: 270°
mnist$train$p <- sample(x = c(0, 1, 2, 3), size = ntrain, replace = TRUE)
mnist$test$p <- sample(x = c(0, 1, 2, 3), size = ntest, replace = TRUE)
mnist$train$pBin <- to_categorical(mnist$train$p)
mnist$test$pBin <- to_categorical(mnist$test$p)
## rotating image with OpenImageR::rotateImage()
mnist$train$xRot <- array(dim = c(ntrain, 28, 28, 1))
for (i in seq(1, ntrain)) {
angle <- mnist$train$p[i]*90
mnist$train$xRot[i,,,] <- rotateImage(image = mnist$train$x[i,,,],
angle = angle)
}
mnist$test$xRot <- array(dim = c(ntest, 28, 28, 1))
for (i in seq(1, ntest)) {
angle <- mnist$test$p[i]*90
mnist$test$xRot[i,,,] <- rotateImage(image = mnist$test$x[i,,,],
angle = angle)
}
## some plots of the rotated data with pseudolabels
par(mfrow = c(4, 4), mai = rep(0.15, 4))
for (i in seq(1, 16)) {
image(t(apply(mnist$train$xRot[i,,,1], 2, rev)),
col = hcl.colors(256, palette = "inferno"),
axes = FALSE,
main = paste(mnist$train$y[i], ": ", mnist$train$p[i]*90, "degrees"))
}
## CNN model for rotation classification
modelRot <- keras_model_sequential(name = "MNIST-CNN-Rotation") %>%
layer_conv_2d(filters = 64, kernel_size = 2, padding = "same",
activation = "relu", input_shape = c(28, 28, 1)) %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.3) %>%
layer_conv_2d(filters = 32, kernel_size = 2, padding = "same",
activation = "relu") %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.3) %>%
layer_flatten() %>%
layer_dense(units = 256, activation = "relu") %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 4, activation = "softmax")
## compile model
modelRot %>%
compile(loss = "categorical_crossentropy",
optimizer = "adam",
metrics = c("accuracy"))
## fit model
historyRot <- modelRot %>%
fit(x = mnist$train$xRot,
y = mnist$train$pBin,
batch_size = 64,
epochs = 10,
validation_split = 0.2)
## plot change of loss/accuracy over course of optimization
plot(historyRot)
## evaluate on test data
modelRot %>%
evaluate(x = mnist$test$xRot,
y = mnist$test$pBin,
verbose = 0)
## extract weights from fitted model
weights_rotation <- get_weights(modelRot)
str(weights_rotation)
```
## Self-Supervised Learning: Downstream Task
```{r}
## Downstream task
## ----------------------------------------------------------------------------
## CNN model for image classification
modelDown <- keras_model_sequential(name = "MNIST-CNN-Downstream") %>%
layer_conv_2d(filters = 64, kernel_size = 2, padding = "same",
activation = "relu", input_shape = c(28, 28, 1)) %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.3) %>%
layer_conv_2d(filters = 32, kernel_size = 2, padding = "same",
activation = "relu") %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.3) %>%
layer_flatten() %>%
layer_dense(units = 256, activation = "relu") %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 10, activation = "softmax")
## take over weights from pretext task for first few layers
weights_down <- get_weights(modelDown)
weights_down[[1]] <- weights_rotation[[1]]
weights_down[[2]] <- weights_rotation[[2]]
weights_down[[3]] <- weights_rotation[[3]]
weights_down[[4]] <- weights_rotation[[4]]
weights_down[[5]] <- weights_rotation[[5]]
weights_down[[6]] <- weights_rotation[[6]]
## fit model M times with different small data sets and get test error
M <- 20
testerrSelf <- t(replicate(n = M, expr = {
## overwrite weights with initialization weights from pretext task
modelDown %>%
set_weights(weights = weights_down)
## freeze weights from convolution layers
modelDown %>%
freeze_weights(from = 1, to = 4)
# summary(modelDown)
## compile model
modelDown %>%
compile(loss = "categorical_crossentropy",
optimizer = "adam",
metrics = c("accuracy"))
## draw a small data set
sdat <- smallData(perc = 0.0025)
## calibrate parameters in last layers first ("model head")
modelDown %>%
fit(x = sdat$x,
y = sdat$yBin,
batch_size = 1,
epochs = 10,
validation_split = 0.2)
## unfreeze weights
modelDown %>%
unfreeze_weights()
## compile model again
modelDown %>%
compile(loss = "categorical_crossentropy",
optimizer = "adam",
metrics = c("accuracy"))
## fit the whole model including parameters from conv layers
modelDown %>%
fit(x = sdat$x,
y = sdat$yBin,
batch_size = 1,
epochs = 10,
validation_split = 0.2)
## evaluate on test data
testErr <- modelDown %>%
evaluate(x = mnist$test$x,
y = mnist$test$yBin,
verbose = 0)
return(c(loss = testErr$loss, accuracy = testErr$accuracy))
}, simplify = TRUE))
## Plot testerror
library(ggplot2)
library(dplyr)
library(tidyr)
rbind(data.frame(testerrSuper, method = "Supervised"),
data.frame(testerrSelf, method = "Self-supervised")) %>%
gather(key = "variable", value = "value", accuracy, loss) %>%
ggplot(aes(x = method , y = value)) +
geom_boxplot() +
geom_jitter(alpha = 0.7, width = 0.3) +
facet_wrap(~ variable, scales = "free")
```
## Important things we learned
* Definition of pretext task is crucial
* We should probably not use classification or a different loss function for a rotation task
* First freeze transfered weights and calibrate the remaining part of the model
* Train model again a bit afterwards (but easy to overfit!)
## Discussion points
* Could self-supervised learning be used in your field?
* How could a pretext task could look like?
* In which other fields do you see applications?
## Useful links
* [List](https://github.com/jason718/awesome-self-supervised-learning) with many relevant papers from arXiv
* [Review](https://arxiv.org/pdf/1902.06162.pdf) paper of self-supervised learning in computer vision
* [Talk](https://www.youtube.com/watch?v=SaJL4SLfrcY) by Yann LeCun on self-supervised learning
## SessionInfo
```{r session info}
sessionInfo()
```