diff --git a/docs/source/nn.functional.rst b/docs/source/nn.functional.rst index 00cfb2fd8f4..2f4ab8ad1bf 100644 --- a/docs/source/nn.functional.rst +++ b/docs/source/nn.functional.rst @@ -95,6 +95,9 @@ Dropout functions :nosignatures: dropout + dropout1d + dropout2d + dropout3d Sparse functions ---------------------------------- diff --git a/docs/source/nn.rst b/docs/source/nn.rst index d304fefb0d4..960b7734068 100644 --- a/docs/source/nn.rst +++ b/docs/source/nn.rst @@ -227,6 +227,9 @@ Dropout Layers :template: classtemplate.rst nn.Dropout + nn.Dropout1d + nn.Dropout2d + nn.Dropout3d Sparse Layers ---------------------------------- diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml index e1201af5489..a333df9c216 100755 --- a/oneflow/core/functional/functional_api.yaml +++ b/oneflow/core/functional/functional_api.yaml @@ -808,7 +808,7 @@ signature: [ "Tensor (Tensor input, *, DataType dtype=kFloat, Generator generator=None) => Bernoulli", - "Tensor (Tensor input, Double p, *, DataType dtype=kFloat, Generator generator=None) => Bernoulli", + "Tensor (Tensor input, Double p, *, DataType dtype=kFloat, Generator generator=None) => BernoulliProb", ] bind_python: True @@ -1793,6 +1793,18 @@ signature: "Tensor (Tensor dy, Tensor mask, Float scale) => DropoutGrad" bind_python: False +- name: "dropout1d" + signature: "Tensor (Tensor input, Float p=0.5, Bool training=True) => Dropout1d" + bind_python: True + +- name: "dropout2d" + signature: "Tensor (Tensor input, Float p=0.5, Bool training=True) => Dropout2d" + bind_python: True + +- name: "dropout3d" + signature: "Tensor (Tensor input, Float p=0.5, Bool training=True) => Dropout3d" + bind_python: True + - name: "constant_pad" signature: 'Tensor (Tensor x, Int64List pad, Scalar value=0) => ConstantPad' bind_python: False diff --git a/oneflow/core/functional/impl/nn_functor.cpp b/oneflow/core/functional/impl/nn_functor.cpp index 5123250553d..a4779723172 100644 --- a/oneflow/core/functional/impl/nn_functor.cpp +++ b/oneflow/core/functional/impl/nn_functor.cpp @@ -2457,6 +2457,103 @@ class DropoutFunctor { std::shared_ptr add_op_; }; +namespace { +Maybe MakeFeatureNoise(const std::shared_ptr& x) { + const int64_t ndim = x->ndim(); + CHECK_GE_OR_RETURN(ndim, 2) << Error::RuntimeError() + << "Feature dropout requires at least 2 dimensions in the input"; + std::vector sizes; + sizes.reserve(ndim); + sizes.push_back(x->shape()->At(0)); + sizes.push_back(x->shape()->At(1)); + for (int i = 2; i < ndim; i++) { sizes.push_back(1); } + return JUST(Empty(Shape(sizes), x->dtype(), JUST(x->device()), false)); +} + +Maybe DropoutImpl(const std::shared_ptr& input, const float& p, + const bool& train) { + CHECK_EQ_OR_RETURN(p >= 0 && p <= 1, true) + << "dropout probability has to be between 0 and 1, but got " << p; + if (p == 0 || !train || input->shape()->elem_cnt() == 0) { return input; } + if (p == 1) { + std::shared_ptr other = + JUST(Constant(*input->shape(), Scalar(0.0), input->dtype(), JUST(input->device()))); + return InplaceMul(input, other); + } + std::shared_ptr noise = JUST(MakeFeatureNoise(input)); + noise = JUST(BernoulliProb(noise, 1.0 - p, noise->dtype(), JUST(one::DefaultAutoGenerator()))); + noise = JUST(InplaceScalarDiv(noise, Scalar(1.0 - p))); + noise = JUST(InplaceMul(input, noise)); + return noise; +} +} // namespace + +class Dropout1dFunctor { + public: + Maybe operator()(const std::shared_ptr& input, const float& p, + const bool& training) const { + CHECK_EQ_OR_RETURN(p < 0 || p > 1.0, true) + << "dropout probability has to be between 0 and 1, but got " << p; + const int input_dim = input->ndim(); + CHECK_EQ_OR_RETURN(input_dim != 2 && input_dim != 3, true) + << "dropout1d: Expected 2D or 3D input, but received a {inp_dim}D input. " + "Note that dropout1d exists to provide channel-wise dropout on inputs with 1 " + "spatial dimension, a channel dimension, and an optional batch dimension " + "(i.e. 2D or 3D inputs)."; + bool is_batched = (input_dim == 3); + std::shared_ptr result; + if (!is_batched) { result = JUST(Unsqueeze(input, 0)); } + result = JUST(DropoutImpl(result, p, training)); + if (!is_batched) { result = JUST(Squeeze(result, std::vector{0})); } + return result; + } +}; + +class Dropout2dFunctor { + public: + Maybe operator()(const std::shared_ptr& input, const float& p, + const bool& training) const { + CHECK_EQ_OR_RETURN(p < 0 || p > 1.0, true) + << "dropout probability has to be between 0 and 1, but got " << p; + const int input_dim = input->ndim(); + CHECK_EQ_OR_RETURN(input_dim != 3 && input_dim != 4, true) + << "dropout2d: Received a {inp_dim}-D input to dropout2d, which is deprecated " + "and will result in an error in a future release. To retain the behavior " + "and silence this warning, please use dropout instead. Note that dropout2d " + "exists to provide channel-wise dropout on inputs with 2 spatial dimensions, " + "a channel dimension, and an optional batch dimension (i.e. 3D or 4D inputs)."; + CHECK_EQ_OR_RETURN(input_dim == 3, true) + << "dropout2d: Received a 3D input to dropout2d and assuming that channel-wise " + "1D dropout behavior is desired - input is interpreted as shape (N, C, L), where C " + "is the channel dim. This behavior will change in a future release to interpret the " + "input as one without a batch dimension, i.e. shape (C, H, W). To maintain the 1D " + "channel-wise dropout behavior, please switch to using dropout1d instead."; + return JUST(DropoutImpl(input, p, training)); + } +}; + +class Dropout3dFunctor { + public: + Maybe operator()(const std::shared_ptr& input, const float& p, + const bool& training) const { + CHECK_EQ_OR_RETURN(p < 0 || p > 1.0, true) + << "dropout probability has to be between 0 and 1, but got " << p; + const int input_dim = input->ndim(); + CHECK_EQ_OR_RETURN(input_dim != 4 && input_dim != 5, true) + << "dropout3d: Received a {inp_dim}-D input to dropout3d, which is deprecated " + "and will result in an error in a future release. To retain the behavior " + "and silence this warning, please use dropout instead. Note that dropout3d " + "exists to provide channel-wise dropout on inputs with 3 spatial dimensions, " + "a channel dimension, and an optional batch dimension (i.e. 4D or 5D inputs)."; + bool is_batched = (input_dim == 5); + std::shared_ptr result; + if (!is_batched) { result = JUST(Unsqueeze(input, 0)); } + result = JUST(DropoutImpl(result, p, training)); + if (!is_batched) { result = JUST(Squeeze(result, std::vector{0})); } + return result; + } +}; + class DropoutGradFunctor { public: DropoutGradFunctor() { @@ -3838,6 +3935,9 @@ ONEFLOW_FUNCTION_LIBRARY(m) { m.add_functor("Pad"); m.add_functor("Dropout"); m.add_functor("DropoutGrad"); + m.add_functor("Dropout1d"); + m.add_functor("Dropout2d"); + m.add_functor("Dropout3d"); m.add_functor("PixelShuffle"); m.add_functor("AvgPool1D"); m.add_functor("AvgPool2D"); diff --git a/oneflow/core/functional/impl/random_functor.cpp b/oneflow/core/functional/impl/random_functor.cpp index b31be5dff57..a7198c1c891 100644 --- a/oneflow/core/functional/impl/random_functor.cpp +++ b/oneflow/core/functional/impl/random_functor.cpp @@ -448,7 +448,7 @@ using namespace impl; ONEFLOW_FUNCTION_LIBRARY(m) { m.add_functor("Bernoulli"); - m.add_functor("Bernoulli"); + m.add_functor("BernoulliProb"); m.add_functor("RandPerm"); m.add_functor("GlobalRandPerm"); m.add_functor("Rand"); diff --git a/python/oneflow/framework/docstr/dropout.py b/python/oneflow/framework/docstr/dropout.py index b339c3c9563..13bba473564 100644 --- a/python/oneflow/framework/docstr/dropout.py +++ b/python/oneflow/framework/docstr/dropout.py @@ -97,3 +97,329 @@ """, ) + +add_docstr( + oneflow._C.dropout1d, + r""" + dropout1d(x: Tensor, p: float = 0.5, training: bool = True) -> Tensor + + The documentation is referenced from: + https://pytorch.org/docs/1.10/generated/torch.nn.functional.dropout1d.html. + + Randomly zero out entire channels (a channel is a 1D feature map, + e.g., the :math:`j`-th channel of the :math:`i`-th sample in the + batched input is a 1D tensor :math:`\text{input}[i, j]`) of the input tensor). + Each channel will be zeroed out independently on every forward call with + probability :attr:`p` using samples from a Bernoulli distribution. + + See :class:`~oneflow.nn.Dropout1d` for details. + + Args: + p: probability of a channel to be zeroed. Default: 0.5 + training: apply dropout if is ``True``. Default: ``True`` + """, +) + +add_docstr( + oneflow._C.dropout2d, + r""" + dropout1d(x: Tensor, p: float = 0.5, training: bool = True) -> Tensor + + The documentation is referenced from: + https://pytorch.org/docs/1.10/generated/torch.nn.functional.dropout2d.html. + + Randomly zero out entire channels (a channel is a 2D feature map, + e.g., the :math:`j`-th channel of the :math:`i`-th sample in the + batched input is a 2D tensor :math:`\text{input}[i, j]`) of the input tensor). + Each channel will be zeroed out independently on every forward call with + probability :attr:`p` using samples from a Bernoulli distribution. + + See :class:`~oneflow.nn.Dropout2d` for details. + + Args: + p: probability of a channel to be zeroed. Default: 0.5 + training: apply dropout if is ``True``. Default: ``True`` + """, +) + +add_docstr( + oneflow._C.dropout3d, + r""" + dropout1d(x: Tensor, p: float = 0.5, training: bool = True) -> Tensor + + The documentation is referenced from: + https://pytorch.org/docs/1.10/generated/torch.nn.functional.dropout3d.html. + + Randomly zero out entire channels (a channel is a 3D feature map, + e.g., the :math:`j`-th channel of the :math:`i`-th sample in the + batched input is a 3D tensor :math:`\text{input}[i, j]`) of the input tensor). + Each channel will be zeroed out independently on every forward call with + probability :attr:`p` using samples from a Bernoulli distribution. + + See :class:`~oneflow.nn.Dropout3d` for details. + + Args: + p: probability of a channel to be zeroed. Default: 0.5 + training: apply dropout if is ``True``. Default: ``True`` + """, +) + +add_docstr( + oneflow.nn.Dropout, + """ + During training, randomly zeroes some of the elements of the input + tensor with probability :attr:`p` using samples from a Bernoulli + distribution. Each channel will be zeroed out independently on every forward + call. + + The documentation is referenced from: https://pytorch.org/docs/1.10/generated/torch.nn.Dropout.html. + + This has proven to be an effective technique for regularization and + preventing the co-adaptation of neurons as described in the paper + "Improving neural networks by preventing co-adaptation of feature + detectors". + + Furthermore, the outputs are scaled by a factor of :math:`\\frac{1}{1-p}` during + training. This means that during evaluation the module simply computes an + identity function. + + Additionally, we can pass an extra Tensor `addend` which shape is consistent with input Tensor. + The `addend` Tensor will be add in result after dropout, it is very useful in model's residual connection structure. + + Args: + p: probability of an element to be zeroed. Default: 0.5 + inplace: If set to ``True``, will do this operation in-place. Default: ``False`` + generator: A pseudorandom number generator for sampling + + Shape: + - Input: :math:`(*)`. Input can be of any shape + - Output: :math:`(*)`. Output is of the same shape as input + + For example: + + example 1: + + .. code-block:: python + + >>> import numpy as np + >>> import oneflow as flow + + >>> m = flow.nn.Dropout(p=0) + >>> arr = np.array( + ... [ + ... [-0.7797, 0.2264, 0.2458, 0.4163], + ... [0.4299, 0.3626, -0.4892, 0.4141], + ... [-1.4115, 1.2183, -0.5503, 0.6520], + ... ] + ... ) + >>> x = flow.Tensor(arr) + >>> y = m(x) + >>> y #doctest: +ELLIPSIS + tensor([[-0.7797, 0.2264, 0.2458, 0.4163], + [ 0.4299, 0.3626, -0.4892, 0.4141], + [-1.4115, 1.2183, -0.5503, 0.6520]], dtype=oneflow.float32) + + example 2: + + .. code-block:: python + + >>> import numpy as np + >>> import oneflow as flow + + >>> m = flow.nn.Dropout(p=0) + >>> arr = np.array( + ... [ + ... [-0.7797, 0.2264, 0.2458, 0.4163], + ... [0.4299, 0.3626, -0.4892, 0.4141], + ... [-1.4115, 1.2183, -0.5503, 0.6520], + ... ] + ... ) + >>> x = flow.Tensor(arr) + >>> addend = flow.ones((3, 4), dtype=flow.float32) + >>> y = m(x, addend=addend) + >>> y #doctest: +ELLIPSIS + tensor([[ 0.2203, 1.2264, 1.2458, 1.4163], + [ 1.4299, 1.3626, 0.5108, 1.4141], + [-0.4115, 2.2183, 0.4497, 1.6520]], dtype=oneflow.float32) + + .. _Improving neural networks by preventing co-adaptation of feature + detectors: https://arxiv.org/abs/1207.0580 + """, +) + +add_docstr( + oneflow.nn.Dropout1d, + """ + Randomly zero out entire channels (a channel is a 1D feature map, + e.g., the :math:`j`-th channel of the :math:`i`-th sample in the + batched input is a 1D tensor :math:`\text{input}[i, j]`). + Each channel will be zeroed out independently on every forward call with + probability :attr:`p` using samples from a Bernoulli distribution. + + The documentation is referenced from: https://pytorch.org/docs/1.10/generated/torch.nn.Dropout1d.html. + + Usually the input comes from :class:`nn.Conv1d` modules. + + As described in the paper + `Efficient Object Localization Using Convolutional Networks`_ , + if adjacent pixels within feature maps are strongly correlated + (as is normally the case in early convolution layers) then i.i.d. dropout + will not regularize the activations and will otherwise just result + in an effective learning rate decrease. + + In this case, :func:`oneflow.nn.Dropout1d` will help promote independence between + feature maps and should be used instead. + + Args: + p (float, optional): probability of an element to be zero-ed. + inplace (bool, optional): If set to ``True``, will do this operation + in-place + + Shape: + - Input: :math:`(N, C, L)` or :math:`(C, L)`. + - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input). + + For example: + + .. code-block:: python + + >>> import numpy as np + >>> import oneflow as flow + + >>> m = flow.nn.Dropout1d(p=0) + >>> arr = np.array( + ... [ + ... [-0.7797, 0.2264, 0.2458, 0.4163], + ... [0.4299, 0.3626, -0.4892, 0.4141], + ... [-1.4115, 1.2183, -0.5503, 0.6520], + ... ] + ... ) + >>> x = flow.Tensor(arr) + >>> y = m(x) + >>> y #doctest: +ELLIPSIS + tensor([[-0.7797, 0.2264, 0.2458, 0.4163], + [ 0.4299, 0.3626, -0.4892, 0.4141], + [-1.4115, 1.2183, -0.5503, 0.6520]], dtype=oneflow.float32) + + .. _Efficient Object Localization Using Convolutional Networks: + https://arxiv.org/abs/1411.4280 + """, +) + +add_docstr( + oneflow.nn.Dropout2d, + """ + Randomly zero out entire channels (a channel is a 2D feature map, + e.g., the :math:`j`-th channel of the :math:`i`-th sample in the + batched input is a 2D tensor :math:`\text{input}[i, j]`). + Each channel will be zeroed out independently on every forward call with + probability :attr:`p` using samples from a Bernoulli distribution. + + The documentation is referenced from: https://pytorch.org/docs/1.10/generated/torch.nn.Dropout2d.html. + + Usually the input comes from :class:`nn.Conv2d` modules. + + As described in the paper + `Efficient Object Localization Using Convolutional Networks`_ , + if adjacent pixels within feature maps are strongly correlated + (as is normally the case in early convolution layers) then i.i.d. dropout + will not regularize the activations and will otherwise just result + in an effective learning rate decrease. + + In this case, :func:`oneflow.nn.Dropout2d` will help promote independence between + feature maps and should be used instead. + + Args: + p (float, optional): probability of an element to be zero-ed. + inplace (bool, optional): If set to ``True``, will do this operation + in-place + + Shape: + - Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`. + - Output: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input). + + For example: + + .. code-block:: python + + >>> import numpy as np + >>> import oneflow as flow + + >>> m = flow.nn.Dropout2d(p=0) + >>> arr = np.array( + ... [ + ... [-0.7797, 0.2264, 0.2458, 0.4163], + ... [0.4299, 0.3626, -0.4892, 0.4141], + ... [-1.4115, 1.2183, -0.5503, 0.6520], + ... ] + ... ) + >>> x = flow.Tensor(arr) + >>> y = m(x) + >>> y #doctest: +ELLIPSIS + tensor([[-0.7797, 0.2264, 0.2458, 0.4163], + [ 0.4299, 0.3626, -0.4892, 0.4141], + [-1.4115, 1.2183, -0.5503, 0.6520]], dtype=oneflow.float32) + + .. _Efficient Object Localization Using Convolutional Networks: + https://arxiv.org/abs/1411.4280 + """, +) + +add_docstr( + oneflow.nn.Dropout3d, + """ + Randomly zero out entire channels (a channel is a 3D feature map, + e.g., the :math:`j`-th channel of the :math:`i`-th sample in the + batched input is a 3D tensor :math:`\text{input}[i, j]`). + Each channel will be zeroed out independently on every forward call with + probability :attr:`p` using samples from a Bernoulli distribution. + + The documentation is referenced from: https://pytorch.org/docs/1.10/generated/torch.nn.Dropout2d.html. + + Usually the input comes from :class:`nn.Conv3d` modules. + + As described in the paper + `Efficient Object Localization Using Convolutional Networks`_ , + if adjacent pixels within feature maps are strongly correlated + (as is normally the case in early convolution layers) then i.i.d. dropout + will not regularize the activations and will otherwise just result + in an effective learning rate decrease. + + In this case, :func:`oneflow.nn.Dropout3d` will help promote independence between + feature maps and should be used instead. + + Args: + p (float, optional): probability of an element to be zeroed. + inplace (bool, optional): If set to ``True``, will do this operation + in-place + + Shape: + - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`. + - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input). + + For example: + + .. code-block:: python + + >>> import numpy as np + >>> import oneflow as flow + + >>> m = flow.nn.Dropout3d(p=0) + >>> arr = np.array( + ... [ + ... [-0.7797, 0.2264, 0.2458, 0.4163], + ... [0.4299, 0.3626, -0.4892, 0.4141], + ... [-1.4115, 1.2183, -0.5503, 0.6520], + ... ] + ... ) + >>> x = flow.Tensor(arr) + >>> y = m(x) + >>> y #doctest: +ELLIPSIS + tensor([[-0.7797, 0.2264, 0.2458, 0.4163], + [ 0.4299, 0.3626, -0.4892, 0.4141], + [-1.4115, 1.2183, -0.5503, 0.6520]], dtype=oneflow.float32) + + .. _Efficient Object Localization Using Convolutional Networks: + https://arxiv.org/abs/1411.4280 + """, +) diff --git a/python/oneflow/nn/__init__.py b/python/oneflow/nn/__init__.py index 05d9b9ac830..a160cbdc6b9 100644 --- a/python/oneflow/nn/__init__.py +++ b/python/oneflow/nn/__init__.py @@ -94,7 +94,7 @@ RawReader, ) -from oneflow.nn.modules.dropout import Dropout +from oneflow.nn.modules.dropout import Dropout, Dropout1d, Dropout2d, Dropout3d from oneflow.nn.modules.flatten import Flatten from oneflow.nn.modules.instancenorm import ( InstanceNorm1d, diff --git a/python/oneflow/nn/functional/__init__.py b/python/oneflow/nn/functional/__init__.py index 56e3ed152c8..3587919d5c6 100644 --- a/python/oneflow/nn/functional/__init__.py +++ b/python/oneflow/nn/functional/__init__.py @@ -57,7 +57,7 @@ from oneflow._C import silu from oneflow._C import mish from oneflow.nn.modules.normalization import layer_norm -from oneflow._C import dropout +from oneflow._C import dropout, dropout1d, dropout2d, dropout3d from oneflow._C import smooth_l1_loss from .functional_pad import pad from oneflow._C import triplet_margin_loss diff --git a/python/oneflow/nn/modules/dropout.py b/python/oneflow/nn/modules/dropout.py index ed4883af25f..30685419525 100644 --- a/python/oneflow/nn/modules/dropout.py +++ b/python/oneflow/nn/modules/dropout.py @@ -40,80 +40,6 @@ def extra_repr(self) -> str: class Dropout(_DropoutNd): - """During training, randomly zeroes some of the elements of the input - tensor with probability :attr:`p` using samples from a Bernoulli - distribution. Each channel will be zeroed out independently on every forward - call. - - This has proven to be an effective technique for regularization and - preventing the co-adaptation of neurons as described in the paper - "Improving neural networks by preventing co-adaptation of feature - detectors". - - Furthermore, the outputs are scaled by a factor of :math:`\\frac{1}{1-p}` during - training. This means that during evaluation the module simply computes an - identity function. - - Additionally, we can pass an extra Tensor `addend` which shape is consistent with input Tensor. - The `addend` Tensor will be add in result after dropout, it is very useful in model's residual connection structure. - - Args: - p: probability of an element to be zeroed. Default: 0.5 - inplace: If set to ``True``, will do this operation in-place. Default: ``False`` - generator: A pseudorandom number generator for sampling - - Shape: - - Input: :math:`(*)`. Input can be of any shape - - Output: :math:`(*)`. Output is of the same shape as input - - For example: - - example 1: - - .. code-block:: python - - >>> import numpy as np - >>> import oneflow as flow - - >>> m = flow.nn.Dropout(p=0) - >>> arr = np.array( - ... [ - ... [-0.7797, 0.2264, 0.2458, 0.4163], - ... [0.4299, 0.3626, -0.4892, 0.4141], - ... [-1.4115, 1.2183, -0.5503, 0.6520], - ... ] - ... ) - >>> x = flow.Tensor(arr) - >>> y = m(x) - >>> y #doctest: +ELLIPSIS - tensor([[-0.7797, 0.2264, 0.2458, 0.4163], - [ 0.4299, 0.3626, -0.4892, 0.4141], - [-1.4115, 1.2183, -0.5503, 0.6520]], dtype=oneflow.float32) - - example 2: - - .. code-block:: python - - >>> import numpy as np - >>> import oneflow as flow - - >>> m = flow.nn.Dropout(p=0) - >>> arr = np.array( - ... [ - ... [-0.7797, 0.2264, 0.2458, 0.4163], - ... [0.4299, 0.3626, -0.4892, 0.4141], - ... [-1.4115, 1.2183, -0.5503, 0.6520], - ... ] - ... ) - >>> x = flow.Tensor(arr) - >>> addend = flow.ones((3, 4), dtype=flow.float32) - >>> y = m(x, addend=addend) - >>> y #doctest: +ELLIPSIS - tensor([[ 0.2203, 1.2264, 1.2458, 1.4163], - [ 1.4299, 1.3626, 0.5108, 1.4141], - [-0.4115, 2.2183, 0.4497, 1.6520]], dtype=oneflow.float32) - """ - def __init__(self, p: float = 0.5, inplace: bool = False, generator=None): _DropoutNd.__init__(self, p, inplace) self.p = p @@ -132,6 +58,21 @@ def forward(self, x, addend=None): ) +class Dropout1d(Dropout): + def forward(self, x, addend=None): + return flow._C.dropout1d(x, self.p, self.training) + + +class Dropout2d(Dropout): + def forward(self, x, addend=None): + return flow._C.dropout2d(x, self.p, self.training) + + +class Dropout3d(Dropout): + def forward(self, x, addend=None): + return flow._C.dropout3d(x, self.p, self.training) + + if __name__ == "__main__": import doctest diff --git a/python/oneflow/test/modules/test_dropout.py b/python/oneflow/test/modules/test_dropout.py index 8badc4b42b4..fbe96f525b1 100644 --- a/python/oneflow/test/modules/test_dropout.py +++ b/python/oneflow/test/modules/test_dropout.py @@ -239,7 +239,7 @@ class TestModule(flow.unittest.TestCase): def test_dropout_numpy_case(test_case): arg_dict = OrderedDict() arg_dict["test_fun"] = [do_test_dropout_numpy_p0, do_test_dropout_numpy_p1] - arg_dict["shape"] = [[4, 127, 256], [2, 1024, 1024]] + arg_dict["shape"] = [[4], [4, 3], [4, 127, 256], [2, 1024, 1024]] arg_dict["device"] = ["cuda"] if os.getenv("ONEFLOW_TEST_CPU_ONLY"): arg_dict["device"] = ["cpu"] @@ -298,21 +298,81 @@ def test_gpu_fixed_dropout(test_case): for arg in GenArgList(arg_dict): arg[0](test_case) - @autotest() + @autotest(n=5) def autotest_dropout_p0(test_case): device = random_device() x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) m = torch.nn.Dropout(p=0, inplace=random_bool()) return m(x) - @autotest() + @autotest(n=5) + def autotest_dropout1d_p0(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout1d(p=0, inplace=random_bool()) + return m(x) + + @autotest(n=5) + def autotest_dropout2d_p0(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout2d(p=0, inplace=random_bool()) + return m(x) + + @autotest(n=5) + def autotest_dropout3d_p0(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout3d(p=0, inplace=random_bool()) + return m(x) + + @autotest(n=5) def autotest_dropout_p1(test_case): device = random_device() x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) m = torch.nn.Dropout(p=1.0, inplace=random_bool()) return m(x) - @autotest() + @autotest(n=5) + def autotest_dropout1d_p1(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout1d(p=1.0, inplace=random_bool()) + return m(x) + + @autotest(n=5) + def autotest_dropout2d_p1(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout2d(p=1.0, inplace=random_bool()) + return m(x) + + @autotest(n=5) + def autotest_dropout3d_p1(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout3d(p=1.0, inplace=random_bool()) + return m(x) + + @autotest(n=5) + def autotest_functional_dropout1d_p1(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + return torch.nn.functional.dropout1d(x, p=1.0) + + @autotest(n=5) + def autotest_functional_dropout2d_p1(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + return torch.nn.functional.dropout2d(x, p=1.0) + + @autotest(n=5) + def autotest_functional_dropout3d_p1(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + return torch.nn.functional.dropout3d(x, p=1.0) + + @autotest(n=5) def autotest_dropout_eval(test_case): device = random_device() x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) @@ -320,7 +380,31 @@ def autotest_dropout_eval(test_case): m.eval() return m(x) - @autotest() + @autotest(n=5) + def autotest_dropout1d_eval(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout1d(p=1.0, inplace=random_bool()) + m.eval() + return m(x) + + @autotest(n=5) + def autotest_dropout2d_eval(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout2d(p=1.0, inplace=random_bool()) + m.eval() + return m(x) + + @autotest(n=5) + def autotest_dropout3d_eval(test_case): + device = random_device() + x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device) + m = torch.nn.Dropout3d(p=1.0, inplace=random_bool()) + m.eval() + return m(x) + + @autotest(n=5) def autotest_0dim_dropout_eval(test_case): device = random_device() x = random_tensor(ndim=0).to(device)