-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathsvtrv2_smtr_gtc_rctc_infer.yml
151 lines (143 loc) · 4.1 KB
/
svtrv2_smtr_gtc_rctc_infer.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
Global:
device: gpu
epoch_num: 20
log_smooth_window: 20
print_batch_step: 10
output_dir: ./output/rec/u14m_filter/svtrv2_smtr_gtc_rctc
save_epoch_step: 1
# evaluation is run every 2000 iterations
eval_batch_step: [0, 500]
eval_epoch_step: [0, 1]
cal_metric_during_train: True
pretrained_model:
# ./output/rec/u14m_filter/svtrv2_rctc/best.pth
checkpoints:
use_tensorboard: false
infer_img:
# for data or label process
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
# ./tools/utils/ppocr_keys_v1.txt # ch
max_text_length: &max_text_length 25
use_space_char: &use_space_char False
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_smtr_gtc_rctc.txt
use_amp: True
Optimizer:
name: AdamW
lr: 0.000325 # for 4gpus bs128/gpu
weight_decay: 0.05
filter_bias_and_bn: True
LRScheduler:
name: OneCycleLR
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
cycle_momentum: False
Architecture:
model_type: rec
algorithm: SVTRv2
in_channels: 3
Transform:
Encoder:
name: SVTRv2LNConvTwo33
use_pos_embed: False
dims: [128, 256, 384]
depths: [6, 6, 6]
num_heads: [4, 8, 12]
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
local_k: [[5, 5], [5, 5], [-1, -1]]
sub_k: [[1, 1], [2, 1], [-1, -1]]
last_stage: false
feat2d: True
Decoder:
name: GTCDecoder
infer_gtc: False
detach: False
gtc_decoder:
name: SMTRDecoder
num_layer: 1
ds: True
max_len: *max_text_length
next_mode: &next True
sub_str_len: &subsl 5
ctc_decoder:
name: RCTCDecoder
Loss:
name: CTCLoss
zero_infinity: True
PostProcess:
name: CTCLabelDecode
character_dict_path: *character_dict_path
use_space_char: *use_space_char
Metric:
name: RecMetric
main_indicator: acc
is_filter: True
Train:
dataset:
name: RatioDataSetTVResize
ds_width: True
padding: false
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
'../Union14M-L-LMDB-Filtered/filter_train_hard',
'../Union14M-L-LMDB-Filtered/filter_train_medium',
'../Union14M-L-LMDB-Filtered/filter_train_normal',
'../Union14M-L-LMDB-Filtered/filter_train_easy',
]
transforms:
- DecodeImagePIL: # load image
img_mode: RGB
- PARSeqAugPIL:
- CTCLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
sampler:
name: RatioSampler
scales: [[128, 32]] # w, h
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: &bs 128
fix_bs: false
divided_factor: [4, 16] # w, h
is_training: True
loader:
shuffle: True
batch_size_per_card: *bs
drop_last: True
max_ratio: &max_ratio 12
num_workers: 4
Eval:
dataset:
name: RatioDataSetTVResize
ds_width: True
padding: False
data_dir_list: [
'../evaluation/CUTE80',
'../evaluation/IC13_857',
'../evaluation/IC15_1811',
'../evaluation/IIIT5k',
'../evaluation/SVT',
'../evaluation/SVTP',
]
transforms:
- DecodeImagePIL: # load image
img_mode: RGB
- CTCLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
sampler:
name: RatioSampler
scales: [[128, 32]] # w, h
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: *bs
fix_bs: false
divided_factor: [4, 16] # w, h
is_training: False
loader:
shuffle: False
drop_last: False
batch_size_per_card: *bs
max_ratio: *max_ratio
num_workers: 4