-
Notifications
You must be signed in to change notification settings - Fork 158
/
Copy pathmodel.proto
236 lines (217 loc) · 11.4 KB
/
model.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// Copyright 2023 The Deeplab2 Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package deeplab2;
option java_multiple_files = true;
/********** Submessages used to config model options **********/
// Configure the decoder model options.
message DecoderOptions {
// Set the features key for the high-level features, e.g. 'res5'.
optional string feature_key = 1;
// Set the number of filters in each convolution of the decoder.
optional int32 decoder_channels = 2 [default = 256];
// Set the decoder convolution type. Support 'depthwise_separable_conv' and
// 'standard_conv'.
optional string decoder_conv_type = 5 [default = 'depthwise_separable_conv'];
// Set the number of filters in each convolution of the ASPP.
optional int32 aspp_channels = 3 [default = 256];
// Set the list of atrous rates used in the ASPP. Note that this field has
// to be of length 3 (to specify the three 3x3 atrous convolutions in ASPP),
// and it is effective only when `aspp_use_only_1x1_proj_conv` is false.
repeated int32 atrous_rates = 4;
// The ASPP module uses only 1x1 projection convolution (i.e., the ASPP five
// branches consisting of one 1x1 convolution, three 3x3 atrous convolutions
// with specified `atrous_rates`, and the global average pooling are turned
// off, when `aspp_use_only_1x1_proj_conv` is true), equivalent to applying
// only one 1x1 convolution to reduce the feature map channels (obtained from
// encoder backbone) to the specified `aspp_channels`. This field is mainly
// used (i.e., set to true) when the encoder backbone is already able to
// efficiently capture long-range information, e.g., by axial attention blocks
// (for reference, see configs/cityscapes/axial_deeplab).
optional bool aspp_use_only_1x1_proj_conv = 6 [default = false];
}
// Configure the low level features to use.
message LowLevelOptions {
// Set the name of the low-level feature, e.g. 'res2'.
optional string feature_key = 1;
// Set the number of filters for the 1x1 projection convolution.
optional int32 channels_project = 2;
}
// Configure the head options.
message HeadOptions {
// Set the number of filters in the last convolution, e.g. 1 or NUM_CLASSES.
optional int32 output_channels = 1;
// Set the number of filters in the 5x5 convolution, e.g. 256 or 32.
optional int32 head_channels = 2;
// Set the head convolution type. Support 'depthwise_separable_conv' and
// 'standard_conv'
optional string head_conv_type = 3 [default = 'depthwise_separable_conv'];
// Set the maximum value after activation. max_value_after_activation and
// min_value_after_activation are used to shift the head outputs to a range.
// One example is using sigmoid to shift the range for depth prediction.
optional float max_value_after_activation = 4 [default = 0];
// Set the minimum value after activation. max_value_after_activation and
// min_value_after_activation are used to shift the head outputs to a range.
// One example is using sigmoid to shift the range for depth prediction.
optional float min_value_after_activation = 5 [default = 0];
}
// Configure the instance branch.
message InstanceOptions {
// Set whether to use the instance branch.
optional bool enable = 1 [default = true];
// Set the low level options used in instance branch. The list of
// LowLevelOptions must be ordered lower resolution to higher resolution.
// Leaving it empty will use the same low level options as the semantic
// branch.
repeated LowLevelOptions low_level_override = 2;
// Set the decoder options of the instance branch. Leaving it empty will use
// the same decoder options as the semantic branch.
optional DecoderOptions instance_decoder_override = 3;
// Configure instance center head.
optional HeadOptions center_head = 4;
// Configure instance regression head.
optional HeadOptions regression_head = 5;
// Configure next-frame instance regression head.
optional HeadOptions next_regression_head = 6;
}
// Configure the model options.
// Next ID: 12
message ModelOptions {
// Configure model backbone.
message BackboneOptions {
// Set the name of the specific architecture of the family.
optional string name = 1 [default = 'resnet50'];
// Set the output stride of the encoder.
optional int32 output_stride = 2 [default = 32];
// Set path to pretrained weights to load pretrained weights.
optional string pretrained_weights = 3;
// Set whether to use the squeeze-and-excite operation.
optional bool use_squeeze_and_excite = 4 [default = false];
// Set the drop path keep probability for training. Default not to use.
optional float drop_path_keep_prob = 5 [default = 1.0];
// Set the drop path schedule. Currently support (1) 'constant': use the
// same drop path probability for all blocks, and (2) 'linear': linearly
// decrease the drop path probability from 1.0 at the 0-th stage (or STEM)
// to drop_path_keep_prob at the last block.
optional string drop_path_schedule = 6 [default = 'constant'];
// Set the STEM width_multiplier, controlloing STEM convolution channels.
optional float stem_width_multiplier = 7 [default = 1.0];
// Set the backbone (except STEM) width_multiplier, controlling backbone
// (except STEM) convolution channels.
optional float backbone_width_multiplier = 8 [default = 1.0];
// Set the backbone (except STEM) layer_multiplier, controlling the number
// of layers in the backbone (except STEM).
optional float backbone_layer_multiplier = 9 [default = 1.0];
// Use the Switchable Atrous Convolution (SAC) beyond the specified stride.
// For example, if use_sac_beyond_stride = 16, SAC will be applied to the
// network stage whose original output stride >= 16 (i.e., 16 and 32, or
// the last two stages). Set to -1 to disable it.
optional int32 use_sac_beyond_stride = 10 [default = -1];
}
// Set the model option for the backbone encoder model.
optional BackboneOptions backbone = 1;
// Shared decoder settings across different meta architectures.
optional DecoderOptions decoder = 2;
// Meta-architecture specific settings.
message DeeplabV3Options {
// Set the number of classes for the last convolution to predict logits.
optional int32 num_classes = 1;
}
message DeeplabV3PlusOptions {
// Set the low level options used in this decoder. The list of
// LowLevelOptions must be ordered from higher to lower levels.
optional LowLevelOptions low_level = 1;
// Set the number of classes for the last convolution to predict logits.
optional int32 num_classes = 2;
}
message PanopticDeeplabOptions {
// Set the low level options used in this decoder. The list of
// LowLevelOptions must be ordered lower resolution to higher resolution.
repeated LowLevelOptions low_level = 1;
// Set the model options for the instance branch.
optional InstanceOptions instance = 2;
// Set the model options of the semantic head.
optional HeadOptions semantic_head = 3;
// Set the model options of the depth head.
optional HeadOptions depth_head = 4;
}
message MotionDeepLabOptions {
// Set the low level options used in this decoder. The list of
// LowLevelOptions must be ordered lower resolution to higher resolution.
repeated LowLevelOptions low_level = 1;
// Set the model options for the instance branch.
optional InstanceOptions instance = 2;
// Set the model options of the semantic head.
optional HeadOptions semantic_head = 3;
// Set the model options for the motion head.
optional HeadOptions motion_head = 4;
}
message MaXDeepLabOptions {
// Set the head options of the mask head.
optional HeadOptions pixel_space_head = 1;
// Set the low level options used in the semantic decoder. The list of
// LowLevelOptions must be ordered lower resolution to higher resolution.
repeated LowLevelOptions auxiliary_low_level = 2;
// Set the head options of the semantic head.
optional HeadOptions auxiliary_semantic_head = 3;
// Set the number of points or features that we will sample to perform the
// pixel contrastive instance discrimination loss. This is only valid for
// MaX-DeepLab. If this is set to 0, then we use the original MaX-DeepLab
// instance discrimination loss. Otherwise, we replace the original instance
// discrimination loss with a pixel contrastive instance discrimination
// loss. Default to 0 as in the original MaX-DeepLab. It is recommended to
// use 16384 for Cityscapes.
optional int32 instance_discrimination_sample_k = 4 [default = 0];
// Set an optional float temperature specifying the sample temperature of
// the points or features for the pixel contrastive instance discrimination
// loss. The sampling is based on object scale. When temperature > 1,
// the sampling will bias to smaller object, while a value < 1 will lead to
// a sampling bias to larger object. This is only used when
// instance_discrimination_sample_k > 0. It is recommended to use 0.6 on
// Cityscapes.
optional float instance_discrimination_sample_temperature = 5
[default = 0.6];
// Set the number of points or features that we will sample to perform the
// semantic loss. This is only valid for MaX-DeepLab. If this is set to 0,
// then we use the original semantic loss with all pixels. Otherwise, we
// sample a subset of pixels for semantic loss. Default to 0.
optional int32 semantic_sample_k = 6 [default = 0];
// Set an optional float temperature specifying the sample temperature of
// the points or features for the semantic loss. The sampling is based on
// object scale. When temperature > 1, the sampling will bias to smaller
// object, while a value < 1 will lead to a sampling bias to larger object.
// This is only used when semantic_sample_k > 0. Default to 1.0.
optional float semantic_sample_temperature = 7 [default = 1.0];
}
oneof meta_architecture {
DeeplabV3Options deeplab_v3 = 3;
DeeplabV3PlusOptions deeplab_v3_plus = 4;
PanopticDeeplabOptions panoptic_deeplab = 5;
MotionDeepLabOptions motion_deeplab = 7;
MaXDeepLabOptions max_deeplab = 10;
PanopticDeeplabOptions vip_deeplab = 11;
}
// Set the checkpoint to load.
optional string initial_checkpoint = 6;
// Set whether to restore the last convolution of the semantic head when
// loading from the initial checkpoint. Setting this flag to false is useful
// when an initial checkpoint was trained on a dataset with different classes.
optional bool restore_semantic_last_layer_from_initial_checkpoint = 8
[default = true];
// Set whether to restore the last convolution of the instance heads when
// loading from the initial checkpoint. Depending on the meta architecture,
// this includes center heatmap, center regression and motion regression.
optional bool restore_instance_last_layer_from_initial_checkpoint = 9
[default = true];
}