-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtrain.py
125 lines (109 loc) · 4.1 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
from mmdet.utils import setup_cache_size_limit_of_dynamo
from mmengine.config import Config, DictAction
from mmengine.registry import RUNNERS
from mmengine.runner import Runner
def parse_args():
parser = argparse.ArgumentParser(description="Train a detector")
parser.add_argument(
"config",
help="train config file path",
)
parser.add_argument(
"--work-dir",
help="the dir to save logs and models",
)
parser.add_argument(
"--amp",
action="store_true",
default=False,
help="enable automatic-mixed-precision training",
)
parser.add_argument(
"--auto-scale-lr",
action="store_true",
help="enable automatically scaling LR.",
)
parser.add_argument(
"--resume",
nargs="?",
type=str,
const="auto",
help="If specify checkpoint path, resume from it, while if not "
"specify, try to auto resume from the latest checkpoint "
"in the work directory.",
)
parser.add_argument(
"--cfg-options",
nargs="+",
action=DictAction,
help="override some settings in the used config, the key-value pair "
"in xxx=yyy format will be merged into config file. If the value to "
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
"Note that the quotation marks are necessary and that no white space "
"is allowed.",
)
parser.add_argument("--launcher", choices=["none", "pytorch", "slurm", "mpi"], default="none", help="job launcher")
# When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
# will pass the `--local-rank` parameter to `tools/train.py` instead
# of `--local_rank`.
parser.add_argument("--local_rank", "--local-rank", type=int, default=0)
args = parser.parse_args()
if "LOCAL_RANK" not in os.environ:
os.environ["LOCAL_RANK"] = str(args.local_rank)
return args
def main():
args = parse_args()
# Reduce the number of repeated compilations and improve
# training speed.
setup_cache_size_limit_of_dynamo()
# load config
cfg = Config.fromfile(args.config)
cfg.launcher = args.launcher
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None
cfg.work_dir = args.work_dir
elif cfg.get("work_dir", None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join("./work_dirs", osp.splitext(osp.basename(args.config))[0])
# enable automatic-mixed-precision training
if args.amp is True:
cfg.optim_wrapper.type = "AmpOptimWrapper"
cfg.optim_wrapper.loss_scale = "dynamic"
# enable automatically scaling LR
if args.auto_scale_lr:
if "auto_scale_lr" in cfg and "enable" in cfg.auto_scale_lr and "base_batch_size" in cfg.auto_scale_lr:
cfg.auto_scale_lr.enable = True
else:
raise RuntimeError(
'Can not find "auto_scale_lr" or '
'"auto_scale_lr.enable" or '
'"auto_scale_lr.base_batch_size" in your'
" configuration file."
)
# resume is determined in this priority: resume from > auto_resume
if args.resume == "auto":
cfg.resume = True
cfg.load_from = None
elif args.resume is not None:
cfg.resume = True
cfg.load_from = args.resume
# build the runner from config
if "runner_type" not in cfg:
# build the default runner
runner = Runner.from_cfg(cfg)
else:
# build customized runner from the registry
# if 'runner_type' is set in the cfg
runner = RUNNERS.build(cfg)
# start training
runner.train()
if __name__ == "__main__":
main()