@@ -25,35 +25,41 @@ def clip_resnet50x4_image(
25
25
26
26
Note that the model was trained on inputs with a shape of: [B, 3, 288, 288].
27
27
28
+ Example::
29
+
30
+ >>> model = opt.models.clip_resnet50x4_image(pretrained=True)
31
+ >>> output = model(torch.zeros(1, 3, 288, 288))
32
+
28
33
See here for more details:
29
34
https://github.com/openai/CLIP
30
35
https://github.com/mlfoundations/open_clip
31
36
32
37
Args:
33
38
34
- pretrained (bool, optional): If True, returns a pre-trained model.
35
- Default: False
36
- progress (bool, optional): If True, displays a progress bar of the download to
37
- stderr
38
- Default: True
39
+ pretrained (bool, optional): If `` True`` , returns a pre-trained model.
40
+ Default: `` False``
41
+ progress (bool, optional): If `` True`` , displays a progress bar of the download
42
+ to stderr.
43
+ Default: `` True``
39
44
model_path (str, optional): Optional path for the model file.
40
- Default: None
41
- replace_relus_with_redirectedrelu (bool, optional): If True, return pretrained
42
- model with Redirected ReLU in place of ReLU layers.
43
- Default: *True* when pretrained is True otherwise *False*
44
- use_linear_modules_only (bool, optional): If True, return model
45
+ Default: `` None``
46
+ replace_relus_with_redirectedrelu (bool, optional): If `` True`` , return
47
+ pretrained model with Redirected ReLU in place of ReLU layers.
48
+ Default: *`` True`` * when `` pretrained`` is `` True`` otherwise *`` False`` *
49
+ use_linear_modules_only (bool, optional): If `` True`` , return model
45
50
with all nonlinear layers replaced with linear equivalents.
46
- Default: False
47
- transform_input (bool, optional): If True, preprocesses the input according to
48
- the method with which it was trained.
49
- Default: *True* when pretrained is True otherwise *False*
50
- use_attnpool (bool, optional): Whether or not to use the final AttentionPool2d
51
- layer in the forward function. If set to True, model inputs are required
52
- to have a shape of: [B, 3, 288, 288] or [3, 288, 288].
53
- Default: False
51
+ Default: ``False``
52
+ transform_input (bool, optional): If ``True``, preprocesses the input according
53
+ to the method with which it was trained.
54
+ Default: *``True``* when ``pretrained`` is ``True`` otherwise *``False``*
55
+ use_attnpool (bool, optional): Whether or not to use the final
56
+ ``AttentionPool2d`` layer in the forward function. If set to ``True``,
57
+ model inputs are required to have a shape of: [B, 3, 288, 288] or
58
+ [3, 288, 288].
59
+ Default: ``False``
54
60
55
61
Returns:
56
- **CLIP_ResNet50x4Image ** (CLIP_ResNet50x4Image): A CLIP ResNet 50x4 model's
62
+ **model ** (CLIP_ResNet50x4Image): An instance of a CLIP ResNet 50x4 model's
57
63
image portion.
58
64
"""
59
65
if pretrained :
@@ -98,20 +104,20 @@ def __init__(
98
104
"""
99
105
Args:
100
106
101
- replace_relus_with_redirectedrelu (bool, optional): If True, return
107
+ replace_relus_with_redirectedrelu (bool, optional): If `` True`` , return
102
108
model with Redirected ReLU in place of ReLU layers.
103
109
Default: False
104
- use_linear_modules_only (bool, optional): If True, return model with
110
+ use_linear_modules_only (bool, optional): If `` True`` , return model with
105
111
all nonlinear layers replaced with linear equivalents.
106
- Default: False
107
- transform_input (bool, optional): If True, preprocesses the input according
108
- to the method with which it was trained on.
109
- Default: False
112
+ Default: `` False``
113
+ transform_input (bool, optional): If `` True`` , preprocesses the input
114
+ according to the method with which it was trained on.
115
+ Default: `` False``
110
116
use_attnpool (bool, optional): Whether or not to use the final
111
- AttentionPool2d layer in the forward function. If set to True, model
112
- inputs are required to have a shape of: [B, 3, 288, 288] or
117
+ `` AttentionPool2d`` layer in the forward function. If set to `` True``,
118
+ model inputs are required to have a shape of: [B, 3, 288, 288] or
113
119
[3, 288, 288].
114
- Default: True
120
+ Default: `` True``
115
121
"""
116
122
super ().__init__ ()
117
123
if use_linear_modules_only :
@@ -161,21 +167,21 @@ def _build_layer(
161
167
162
168
inplanes (int, optional): The number of input channels / features to use
163
169
for the first layer.
164
- Default: 80
170
+ Default: ``80``
165
171
planes (int, optional): The number of output channels / features to use
166
172
for the first layer. This variable is then multiplied by 4 to get the
167
173
number of input channels / features to use for the subsequent layers.
168
- Default: 80
174
+ Default: ``80``
169
175
blocks (int, optional): The number of Bottleneck layers to create.
170
- Default: 4
176
+ Default: ``4``
171
177
stride (int, optional): The stride value to use for the Bottleneck layers.
172
- Default: 1
178
+ Default: ``1``
173
179
activ (type of nn.Module, optional): The nn.Module class type to use for
174
180
activation layers.
175
- Default: nn.ReLU
181
+ Default: `` nn.ReLU``
176
182
177
183
Returns:
178
- residual_layer (nn.Sequential): A full residual layer.
184
+ residual_layer (nn.Sequential): A full residual layer instance .
179
185
"""
180
186
layers = [Bottleneck (inplanes , planes , stride , activ = activ )]
181
187
for _ in range (blocks - 1 ):
@@ -246,15 +252,15 @@ def __init__(
246
252
247
253
inplanes (int, optional): The number of input channels / features to use
248
254
for the first layer.
249
- Default: 80
255
+ Default: ``80``
250
256
planes (int, optional): The number of output channels / features to use
251
257
for the subsequent layers.
252
- Default: 80
258
+ Default: ``80``
253
259
stride (int, optional): The stride value to use for the Bottleneck layers.
254
- Default: 1
260
+ Default: ``1``
255
261
activ (type of nn.Module, optional): The nn.Module class type to use for
256
262
activation layers.
257
- Default: nn.ReLU
263
+ Default: `` nn.ReLU``
258
264
"""
259
265
super ().__init__ ()
260
266
self .conv1 = nn .Conv2d (inplanes , planes , kernel_size = 1 , bias = False )
@@ -317,14 +323,15 @@ def __init__(
317
323
318
324
spacial_size (int, optional): The desired size to user for the positional
319
325
embedding.
320
- Default: 9
326
+ Default: ``9``
321
327
in_features (int, optional): The desired input size for the nn.Linear
322
328
layers.
323
- Default: 2560
329
+ Default: `` 2560``
324
330
out_features (int, optional): The desired output size for the nn.Linear
325
331
layers.
332
+ Default: ``640``
326
333
num_heads (int, optional): The number of heads to use.
327
- Default: 40
334
+ Default: ``40``
328
335
"""
329
336
super ().__init__ ()
330
337
self .positional_embedding = nn .Parameter (
0 commit comments