Skip to content

Commit 0e0bc8f

Browse files
authored
Improve CLIP model docs for Sphinx
1 parent 981c763 commit 0e0bc8f

File tree

2 files changed

+76
-62
lines changed

2 files changed

+76
-62
lines changed

captum/optim/models/_image/clip_resnet50x4_image.py

+48-41
Original file line numberDiff line numberDiff line change
@@ -25,35 +25,41 @@ def clip_resnet50x4_image(
2525
2626
Note that the model was trained on inputs with a shape of: [B, 3, 288, 288].
2727
28+
Example::
29+
30+
>>> model = opt.models.clip_resnet50x4_image(pretrained=True)
31+
>>> output = model(torch.zeros(1, 3, 288, 288))
32+
2833
See here for more details:
2934
https://github.com/openai/CLIP
3035
https://github.com/mlfoundations/open_clip
3136
3237
Args:
3338
34-
pretrained (bool, optional): If True, returns a pre-trained model.
35-
Default: False
36-
progress (bool, optional): If True, displays a progress bar of the download to
37-
stderr
38-
Default: True
39+
pretrained (bool, optional): If ``True``, returns a pre-trained model.
40+
Default: ``False``
41+
progress (bool, optional): If ``True``, displays a progress bar of the download
42+
to stderr.
43+
Default: ``True``
3944
model_path (str, optional): Optional path for the model file.
40-
Default: None
41-
replace_relus_with_redirectedrelu (bool, optional): If True, return pretrained
42-
model with Redirected ReLU in place of ReLU layers.
43-
Default: *True* when pretrained is True otherwise *False*
44-
use_linear_modules_only (bool, optional): If True, return model
45+
Default: ``None``
46+
replace_relus_with_redirectedrelu (bool, optional): If ``True``, return
47+
pretrained model with Redirected ReLU in place of ReLU layers.
48+
Default: *``True``* when ``pretrained`` is ``True`` otherwise *``False``*
49+
use_linear_modules_only (bool, optional): If ``True``, return model
4550
with all nonlinear layers replaced with linear equivalents.
46-
Default: False
47-
transform_input (bool, optional): If True, preprocesses the input according to
48-
the method with which it was trained.
49-
Default: *True* when pretrained is True otherwise *False*
50-
use_attnpool (bool, optional): Whether or not to use the final AttentionPool2d
51-
layer in the forward function. If set to True, model inputs are required
52-
to have a shape of: [B, 3, 288, 288] or [3, 288, 288].
53-
Default: False
51+
Default: ``False``
52+
transform_input (bool, optional): If ``True``, preprocesses the input according
53+
to the method with which it was trained.
54+
Default: *``True``* when ``pretrained`` is ``True`` otherwise *``False``*
55+
use_attnpool (bool, optional): Whether or not to use the final
56+
``AttentionPool2d`` layer in the forward function. If set to ``True``,
57+
model inputs are required to have a shape of: [B, 3, 288, 288] or
58+
[3, 288, 288].
59+
Default: ``False``
5460
5561
Returns:
56-
**CLIP_ResNet50x4Image** (CLIP_ResNet50x4Image): A CLIP ResNet 50x4 model's
62+
**model** (CLIP_ResNet50x4Image): An instance of a CLIP ResNet 50x4 model's
5763
image portion.
5864
"""
5965
if pretrained:
@@ -98,20 +104,20 @@ def __init__(
98104
"""
99105
Args:
100106
101-
replace_relus_with_redirectedrelu (bool, optional): If True, return
107+
replace_relus_with_redirectedrelu (bool, optional): If ``True``, return
102108
model with Redirected ReLU in place of ReLU layers.
103109
Default: False
104-
use_linear_modules_only (bool, optional): If True, return model with
110+
use_linear_modules_only (bool, optional): If ``True``, return model with
105111
all nonlinear layers replaced with linear equivalents.
106-
Default: False
107-
transform_input (bool, optional): If True, preprocesses the input according
108-
to the method with which it was trained on.
109-
Default: False
112+
Default: ``False``
113+
transform_input (bool, optional): If ``True``, preprocesses the input
114+
according to the method with which it was trained on.
115+
Default: ``False``
110116
use_attnpool (bool, optional): Whether or not to use the final
111-
AttentionPool2d layer in the forward function. If set to True, model
112-
inputs are required to have a shape of: [B, 3, 288, 288] or
117+
``AttentionPool2d`` layer in the forward function. If set to ``True``,
118+
model inputs are required to have a shape of: [B, 3, 288, 288] or
113119
[3, 288, 288].
114-
Default: True
120+
Default: ``True``
115121
"""
116122
super().__init__()
117123
if use_linear_modules_only:
@@ -161,21 +167,21 @@ def _build_layer(
161167
162168
inplanes (int, optional): The number of input channels / features to use
163169
for the first layer.
164-
Default: 80
170+
Default: ``80``
165171
planes (int, optional): The number of output channels / features to use
166172
for the first layer. This variable is then multiplied by 4 to get the
167173
number of input channels / features to use for the subsequent layers.
168-
Default: 80
174+
Default: ``80``
169175
blocks (int, optional): The number of Bottleneck layers to create.
170-
Default: 4
176+
Default: ``4``
171177
stride (int, optional): The stride value to use for the Bottleneck layers.
172-
Default: 1
178+
Default: ``1``
173179
activ (type of nn.Module, optional): The nn.Module class type to use for
174180
activation layers.
175-
Default: nn.ReLU
181+
Default: ``nn.ReLU``
176182
177183
Returns:
178-
residual_layer (nn.Sequential): A full residual layer.
184+
residual_layer (nn.Sequential): A full residual layer instance.
179185
"""
180186
layers = [Bottleneck(inplanes, planes, stride, activ=activ)]
181187
for _ in range(blocks - 1):
@@ -246,15 +252,15 @@ def __init__(
246252
247253
inplanes (int, optional): The number of input channels / features to use
248254
for the first layer.
249-
Default: 80
255+
Default: ``80``
250256
planes (int, optional): The number of output channels / features to use
251257
for the subsequent layers.
252-
Default: 80
258+
Default: ``80``
253259
stride (int, optional): The stride value to use for the Bottleneck layers.
254-
Default: 1
260+
Default: ``1``
255261
activ (type of nn.Module, optional): The nn.Module class type to use for
256262
activation layers.
257-
Default: nn.ReLU
263+
Default: ``nn.ReLU``
258264
"""
259265
super().__init__()
260266
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
@@ -317,14 +323,15 @@ def __init__(
317323
318324
spacial_size (int, optional): The desired size to user for the positional
319325
embedding.
320-
Default: 9
326+
Default: ``9``
321327
in_features (int, optional): The desired input size for the nn.Linear
322328
layers.
323-
Default: 2560
329+
Default: ``2560``
324330
out_features (int, optional): The desired output size for the nn.Linear
325331
layers.
332+
Default: ``640``
326333
num_heads (int, optional): The number of heads to use.
327-
Default: 40
334+
Default: ``40``
328335
"""
329336
super().__init__()
330337
self.positional_embedding = nn.Parameter(

captum/optim/models/_image/clip_resnet50x4_text.py

+28-21
Original file line numberDiff line numberDiff line change
@@ -23,33 +23,40 @@ def clip_resnet50x4_text(
2323
This model can be combined with the CLIP ResNet 50x4 Image model to create the full
2424
CLIP ResNet 50x4 model.
2525
26+
Example::
27+
28+
>>> model = opt.models.clip_resnet50x4_text(pretrained=True)
29+
>>> clip_tokenizer = opt.transforms.CLIPTokenizer(pretrained_merges=True)
30+
>>> tokenized_input = clip_tokenizer("Some example text.")
31+
>>> output = model(tokenized_input)
32+
2633
See here for more details:
2734
https://github.com/openai/CLIP
2835
https://github.com/mlfoundations/open_clip
2936
3037
Args:
3138
32-
pretrained (bool, optional): If True, returns a pre-trained model.
33-
Default: False
34-
progress (bool, optional): If True, displays a progress bar of the download to
35-
stderr
36-
Default: True
39+
pretrained (bool, optional): If ``True``, returns a pre-trained model.
40+
Default: ``False``
41+
progress (bool, optional): If ``True``, displays a progress bar of the download
42+
to stderr.
43+
Default: ``True``
3744
model_path (str, optional): Optional path for the model file.
38-
Default: None
45+
Default: ``None``
3946
width (int, optional): The desired width size to use for the model.
40-
Default: 640
47+
Default: ``640``
4148
num_heads (int, optional): The number of heads to use for the model.
42-
Default: 10
49+
Default: ``10``
4350
num_residual_layers (int, optional): The number of residual layers to use for
4451
each residual attention block in the model.
45-
Default: 12
52+
Default: ``12``
4653
content_length (int, optional): The expected size of text inputs to the model.
47-
Default: 77
54+
Default: ``77``
4855
vocab_size (int, optional): The size of the vocab used to train the model.
49-
Default: 49408
56+
Default: ``49408``
5057
5158
Returns:
52-
**CLIP_ResNet50x4Text** (CLIP_ResNet50x4Text): A CLIP ResNet 50x4 model's text
59+
**model** (CLIP_ResNet50x4Text): An instance of a CLIP ResNet 50x4 model's text
5360
portion.
5461
"""
5562
if pretrained:
@@ -85,17 +92,17 @@ def __init__(
8592
Args:
8693
8794
width (int, optional): The desired width size to use for the model.
88-
Default: 640
95+
Default: ``640``
8996
num_heads (int, optional): The num number of heads to use for the model.
90-
Default: 10
97+
Default: ``10``
9198
num_residual_layers (int, optional): The number of residual layers to use
9299
for each residual attention block.
93-
Default: 12
100+
Default: ``12``
94101
content_length (int, optional): The expected size of text inputs to the
95102
model.
96-
Default: 77
103+
Default: ``77``
97104
vocab_size (int, optional): The size of the vocab used to train the model.
98-
Default: 49408
105+
Default: ``49408``
99106
"""
100107
super().__init__()
101108
self.transformer = nn.Sequential(
@@ -154,11 +161,11 @@ def __init__(
154161
Args:
155162
156163
width (int, optional): The desired width size to use.
157-
Default: 640
164+
Default: ``640``
158165
num_heads (int, optional): The num number of heads to use.
159-
Default: 10
160-
content_length (int, optional): The desired content_length to use.
161-
Default: 77
166+
Default: ``10``
167+
content_length (int, optional): The desired ``content_length`` to use.
168+
Default: ``77``
162169
"""
163170
super().__init__()
164171
self.attn = nn.MultiheadAttention(width, num_heads)

0 commit comments

Comments
 (0)