-
Notifications
You must be signed in to change notification settings - Fork 2
/
train.py
56 lines (46 loc) · 1.83 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import socket
import time
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.plugins import DDPPlugin
from stretchbev.config import get_parser, get_cfg
from stretchbev.data import prepare_dataloaders
from stretchbev.trainer import TrainingModule
def main():
args = get_parser().parse_args()
cfg = get_cfg(args)
trainloader, valloader = prepare_dataloaders(cfg)
model = TrainingModule(cfg.convert_to_dict())
model.len_loader = len(trainloader)
if cfg.PRETRAINED.LOAD_WEIGHTS:
# Load single-image instance segmentation model.
pretrained_model_weights = torch.load(
os.path.join('.', cfg.PRETRAINED.PATH), map_location='cpu'
)['state_dict']
new_dict = {key: val for (key, val) in pretrained_model_weights.items() if 'decoder' not in key}
model.load_state_dict(new_dict, strict=False)
print(f'Loaded single-image model weights from {cfg.PRETRAINED.PATH}')
save_dir = os.path.join(
cfg.LOG_DIR, time.strftime('%d%B%Yat%H:%M:%S%Z') + '_' + socket.gethostname() + '_' + cfg.TAG
)
checkpoint_callback = ModelCheckpoint(dirpath='weights', filename='stretchbev-{epoch:02d}', save_top_k=-1)
tb_logger = pl.loggers.TensorBoardLogger(save_dir=save_dir)
trainer = pl.Trainer(
gpus=cfg.GPUS,
accelerator='ddp',
precision=cfg.PRECISION,
sync_batchnorm=True,
gradient_clip_val=cfg.GRAD_NORM_CLIP,
max_epochs=cfg.EPOCHS,
weights_summary='full',
logger=tb_logger,
log_every_n_steps=cfg.LOGGING_INTERVAL,
plugins=DDPPlugin(find_unused_parameters=True),
profiler='simple',
callbacks=[checkpoint_callback]
)
trainer.fit(model, trainloader, valloader)
if __name__ == "__main__":
main()