forked from facebookarchive/fb.resnet.torch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
checkpoints.lua
86 lines (73 loc) · 2.66 KB
/
checkpoints.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
--
-- Copyright (c) 2016, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
--
local checkpoint = {}
function checkpoint.latest(opt)
if opt.resume == 'none' then
return nil
end
local latestPath = paths.concat(opt.resume, 'latest.t7')
if not paths.filep(latestPath) then
return nil
end
print('=> Loading checkpoint ' .. latestPath)
local latest = torch.load(latestPath)
local optimState = torch.load(paths.concat(opt.resume, latest.optimFile))
return latest, optimState
end
function checkpoint.save(epoch, model, optimState, isBestModel, trainLosses, testLosses, trainAccs, testAccs, opt)
local function saveModel(m)
local modelFile = 'model_' .. epoch .. '.t7'
local optimFile = 'optimState_' .. epoch .. '.t7'
torch.save(paths.concat(opt.save, modelFile), m)
torch.save(paths.concat(opt.save, optimFile), optimState)
torch.save(paths.concat(opt.save, 'latest.t7'), {
epoch = epoch,
modelFile = modelFile,
optimFile = optimFile,
trainLosses = trainLosses,
testLosses = testLosses,
trainAccs = trainAccs,
testAccs = testAccs,
})
if isBestModel then
torch.save(paths.concat(opt.save, 'model_best.t7'), m)
end
end
-- Remove temporary buffers to reduce checkpoint size
model:clearState()
-- Don't save the DataParallelTable for easier loading on other machines
if torch.type(model) == 'nn.DataParallelTable' then
saveModel(model:get(1))
else
saveModel(model)
end
-- Re-use gradInput buffers if the option is set. This is necessary because
-- of the model:clearState() call clears sharing.
if opt.shareGradInput then
local models = require 'models/init'
models.shareGradInput(model)
end
end
function checkpoint.saveplot(trainY, testY, opt, id)
-- linear scale
local h1 = gnuplot.pdffigure(paths.concat(opt.save,opt.expID .. id .. '.pdf'))
gnuplot.plot({'train', torch.Tensor(trainY), '-'},{'val', torch.Tensor(testY), '-'})
gnuplot.grid(true)
gnuplot.xlabel('Iteration')
gnuplot.ylabel(id)
gnuplot.plotflush(h1)
-- log scale
local h2 = gnuplot.pdffigure(paths.concat(opt.save,opt.expID .. id .. '_logscale.pdf'))
gnuplot.plot({'train', torch.log(torch.Tensor(trainY)), '-'},{'val', torch.log(torch.Tensor(testY)), '-'})
gnuplot.grid(true)
gnuplot.xlabel('Iteration')
gnuplot.ylabel(id .. ' (log-scale)')
gnuplot.plotflush(h2)
end
return checkpoint