diff --git a/docs/source/nn.functional.rst b/docs/source/nn.functional.rst
index 00cfb2fd8f4..2f4ab8ad1bf 100644
--- a/docs/source/nn.functional.rst
+++ b/docs/source/nn.functional.rst
@@ -95,6 +95,9 @@ Dropout functions
     :nosignatures:
 
     dropout
+    dropout1d
+    dropout2d
+    dropout3d
 
 Sparse functions
 ----------------------------------
diff --git a/docs/source/nn.rst b/docs/source/nn.rst
index d304fefb0d4..960b7734068 100644
--- a/docs/source/nn.rst
+++ b/docs/source/nn.rst
@@ -227,6 +227,9 @@ Dropout Layers
     :template: classtemplate.rst
 
     nn.Dropout
+    nn.Dropout1d
+    nn.Dropout2d
+    nn.Dropout3d
 
 Sparse Layers
 ----------------------------------
diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml
index e1201af5489..a333df9c216 100755
--- a/oneflow/core/functional/functional_api.yaml
+++ b/oneflow/core/functional/functional_api.yaml
@@ -808,7 +808,7 @@
   signature:
     [
       "Tensor (Tensor input, *, DataType dtype=kFloat, Generator generator=None) => Bernoulli",
-      "Tensor (Tensor input, Double p, *, DataType dtype=kFloat, Generator generator=None) => Bernoulli",
+      "Tensor (Tensor input, Double p, *, DataType dtype=kFloat, Generator generator=None) => BernoulliProb",
     ]
   bind_python: True
 
@@ -1793,6 +1793,18 @@
   signature: "Tensor (Tensor dy, Tensor mask, Float scale) => DropoutGrad"
   bind_python: False
 
+- name: "dropout1d"
+  signature: "Tensor (Tensor input, Float p=0.5, Bool training=True) => Dropout1d"
+  bind_python: True
+
+- name: "dropout2d"
+  signature: "Tensor (Tensor input, Float p=0.5, Bool training=True) => Dropout2d"
+  bind_python: True
+
+- name: "dropout3d"
+  signature: "Tensor (Tensor input, Float p=0.5, Bool training=True) => Dropout3d"
+  bind_python: True
+
 - name: "constant_pad"
   signature: 'Tensor (Tensor x, Int64List pad, Scalar value=0) => ConstantPad'
   bind_python: False
diff --git a/oneflow/core/functional/impl/nn_functor.cpp b/oneflow/core/functional/impl/nn_functor.cpp
index 5123250553d..a4779723172 100644
--- a/oneflow/core/functional/impl/nn_functor.cpp
+++ b/oneflow/core/functional/impl/nn_functor.cpp
@@ -2457,6 +2457,103 @@ class DropoutFunctor {
   std::shared_ptr<OpExpr> add_op_;
 };
 
+namespace {
+Maybe<Tensor> MakeFeatureNoise(const std::shared_ptr<one::Tensor>& x) {
+  const int64_t ndim = x->ndim();
+  CHECK_GE_OR_RETURN(ndim, 2) << Error::RuntimeError()
+                              << "Feature dropout requires at least 2 dimensions in the input";
+  std::vector<int64_t> sizes;
+  sizes.reserve(ndim);
+  sizes.push_back(x->shape()->At(0));
+  sizes.push_back(x->shape()->At(1));
+  for (int i = 2; i < ndim; i++) { sizes.push_back(1); }
+  return JUST(Empty(Shape(sizes), x->dtype(), JUST(x->device()), false));
+}
+
+Maybe<Tensor> DropoutImpl(const std::shared_ptr<one::Tensor>& input, const float& p,
+                          const bool& train) {
+  CHECK_EQ_OR_RETURN(p >= 0 && p <= 1, true)
+      << "dropout probability has to be between 0 and 1, but got " << p;
+  if (p == 0 || !train || input->shape()->elem_cnt() == 0) { return input; }
+  if (p == 1) {
+    std::shared_ptr<Tensor> other =
+        JUST(Constant(*input->shape(), Scalar(0.0), input->dtype(), JUST(input->device())));
+    return InplaceMul(input, other);
+  }
+  std::shared_ptr<Tensor> noise = JUST(MakeFeatureNoise(input));
+  noise = JUST(BernoulliProb(noise, 1.0 - p, noise->dtype(), JUST(one::DefaultAutoGenerator())));
+  noise = JUST(InplaceScalarDiv(noise, Scalar(1.0 - p)));
+  noise = JUST(InplaceMul(input, noise));
+  return noise;
+}
+}  // namespace
+
+class Dropout1dFunctor {
+ public:
+  Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const float& p,
+                           const bool& training) const {
+    CHECK_EQ_OR_RETURN(p < 0 || p > 1.0, true)
+        << "dropout probability has to be between 0 and 1, but got " << p;
+    const int input_dim = input->ndim();
+    CHECK_EQ_OR_RETURN(input_dim != 2 && input_dim != 3, true)
+        << "dropout1d: Expected 2D or 3D input, but received a {inp_dim}D input. "
+           "Note that dropout1d exists to provide channel-wise dropout on inputs with 1 "
+           "spatial dimension, a channel dimension, and an optional batch dimension "
+           "(i.e. 2D or 3D inputs).";
+    bool is_batched = (input_dim == 3);
+    std::shared_ptr<one::Tensor> result;
+    if (!is_batched) { result = JUST(Unsqueeze(input, 0)); }
+    result = JUST(DropoutImpl(result, p, training));
+    if (!is_batched) { result = JUST(Squeeze(result, std::vector<int32_t>{0})); }
+    return result;
+  }
+};
+
+class Dropout2dFunctor {
+ public:
+  Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const float& p,
+                           const bool& training) const {
+    CHECK_EQ_OR_RETURN(p < 0 || p > 1.0, true)
+        << "dropout probability has to be between 0 and 1, but got " << p;
+    const int input_dim = input->ndim();
+    CHECK_EQ_OR_RETURN(input_dim != 3 && input_dim != 4, true)
+        << "dropout2d: Received a {inp_dim}-D input to dropout2d, which is deprecated "
+           "and will result in an error in a future release. To retain the behavior "
+           "and silence this warning, please use dropout instead. Note that dropout2d "
+           "exists to provide channel-wise dropout on inputs with 2 spatial dimensions, "
+           "a channel dimension, and an optional batch dimension (i.e. 3D or 4D inputs).";
+    CHECK_EQ_OR_RETURN(input_dim == 3, true)
+        << "dropout2d: Received a 3D input to dropout2d and assuming that channel-wise "
+           "1D dropout behavior is desired - input is interpreted as shape (N, C, L), where C "
+           "is the channel dim. This behavior will change in a future release to interpret the "
+           "input as one without a batch dimension, i.e. shape (C, H, W). To maintain the 1D "
+           "channel-wise dropout behavior, please switch to using dropout1d instead.";
+    return JUST(DropoutImpl(input, p, training));
+  }
+};
+
+class Dropout3dFunctor {
+ public:
+  Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const float& p,
+                           const bool& training) const {
+    CHECK_EQ_OR_RETURN(p < 0 || p > 1.0, true)
+        << "dropout probability has to be between 0 and 1, but got " << p;
+    const int input_dim = input->ndim();
+    CHECK_EQ_OR_RETURN(input_dim != 4 && input_dim != 5, true)
+        << "dropout3d: Received a {inp_dim}-D input to dropout3d, which is deprecated "
+           "and will result in an error in a future release. To retain the behavior "
+           "and silence this warning, please use dropout instead. Note that dropout3d "
+           "exists to provide channel-wise dropout on inputs with 3 spatial dimensions, "
+           "a channel dimension, and an optional batch dimension (i.e. 4D or 5D inputs).";
+    bool is_batched = (input_dim == 5);
+    std::shared_ptr<one::Tensor> result;
+    if (!is_batched) { result = JUST(Unsqueeze(input, 0)); }
+    result = JUST(DropoutImpl(result, p, training));
+    if (!is_batched) { result = JUST(Squeeze(result, std::vector<int32_t>{0})); }
+    return result;
+  }
+};
+
 class DropoutGradFunctor {
  public:
   DropoutGradFunctor() {
@@ -3838,6 +3935,9 @@ ONEFLOW_FUNCTION_LIBRARY(m) {
   m.add_functor<impl::PadFunctor>("Pad");
   m.add_functor<impl::DropoutFunctor>("Dropout");
   m.add_functor<impl::DropoutGradFunctor>("DropoutGrad");
+  m.add_functor<impl::Dropout1dFunctor>("Dropout1d");
+  m.add_functor<impl::Dropout2dFunctor>("Dropout2d");
+  m.add_functor<impl::Dropout3dFunctor>("Dropout3d");
   m.add_functor<impl::PixelShuffleFunctor>("PixelShuffle");
   m.add_functor<impl::AvgPool1DFunctor>("AvgPool1D");
   m.add_functor<impl::AvgPool2DFunctor>("AvgPool2D");
diff --git a/oneflow/core/functional/impl/random_functor.cpp b/oneflow/core/functional/impl/random_functor.cpp
index b31be5dff57..a7198c1c891 100644
--- a/oneflow/core/functional/impl/random_functor.cpp
+++ b/oneflow/core/functional/impl/random_functor.cpp
@@ -448,7 +448,7 @@ using namespace impl;
 
 ONEFLOW_FUNCTION_LIBRARY(m) {
   m.add_functor<BernoulliFunctor>("Bernoulli");
-  m.add_functor<BernoulliProbFunctor>("Bernoulli");
+  m.add_functor<BernoulliProbFunctor>("BernoulliProb");
   m.add_functor<RandPermFunctor>("RandPerm");
   m.add_functor<GlobalRandPermFunctor>("GlobalRandPerm");
   m.add_functor<RandFunctor>("Rand");
diff --git a/python/oneflow/framework/docstr/dropout.py b/python/oneflow/framework/docstr/dropout.py
index b339c3c9563..13bba473564 100644
--- a/python/oneflow/framework/docstr/dropout.py
+++ b/python/oneflow/framework/docstr/dropout.py
@@ -97,3 +97,329 @@
  
     """,
 )
+
+add_docstr(
+    oneflow._C.dropout1d,
+    r"""
+    dropout1d(x: Tensor, p: float = 0.5, training: bool = True) -> Tensor 
+
+    The documentation is referenced from:
+    https://pytorch.org/docs/1.10/generated/torch.nn.functional.dropout1d.html.
+
+    Randomly zero out entire channels (a channel is a 1D feature map,
+    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
+    batched input is a 1D tensor :math:`\text{input}[i, j]`) of the input tensor).
+    Each channel will be zeroed out independently on every forward call with
+    probability :attr:`p` using samples from a Bernoulli distribution.
+
+    See :class:`~oneflow.nn.Dropout1d` for details.
+
+    Args:
+        p: probability of a channel to be zeroed. Default: 0.5
+        training: apply dropout if is ``True``. Default: ``True``
+    """,
+)
+
+add_docstr(
+    oneflow._C.dropout2d,
+    r"""
+    dropout1d(x: Tensor, p: float = 0.5, training: bool = True) -> Tensor 
+
+    The documentation is referenced from:
+    https://pytorch.org/docs/1.10/generated/torch.nn.functional.dropout2d.html.
+
+    Randomly zero out entire channels (a channel is a 2D feature map,
+    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
+    batched input is a 2D tensor :math:`\text{input}[i, j]`) of the input tensor).
+    Each channel will be zeroed out independently on every forward call with
+    probability :attr:`p` using samples from a Bernoulli distribution.
+
+    See :class:`~oneflow.nn.Dropout2d` for details.
+
+    Args:
+        p: probability of a channel to be zeroed. Default: 0.5
+        training: apply dropout if is ``True``. Default: ``True``
+    """,
+)
+
+add_docstr(
+    oneflow._C.dropout3d,
+    r"""
+    dropout1d(x: Tensor, p: float = 0.5, training: bool = True) -> Tensor 
+
+    The documentation is referenced from:
+    https://pytorch.org/docs/1.10/generated/torch.nn.functional.dropout3d.html.
+
+    Randomly zero out entire channels (a channel is a 3D feature map,
+    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
+    batched input is a 3D tensor :math:`\text{input}[i, j]`) of the input tensor).
+    Each channel will be zeroed out independently on every forward call with
+    probability :attr:`p` using samples from a Bernoulli distribution.
+
+    See :class:`~oneflow.nn.Dropout3d` for details.
+
+    Args:
+        p: probability of a channel to be zeroed. Default: 0.5
+        training: apply dropout if is ``True``. Default: ``True``
+    """,
+)
+
+add_docstr(
+    oneflow.nn.Dropout,
+    """
+    During training, randomly zeroes some of the elements of the input
+    tensor with probability :attr:`p` using samples from a Bernoulli
+    distribution. Each channel will be zeroed out independently on every forward
+    call.
+
+    The documentation is referenced from: https://pytorch.org/docs/1.10/generated/torch.nn.Dropout.html.
+
+    This has proven to be an effective technique for regularization and
+    preventing the co-adaptation of neurons as described in the paper
+    "Improving neural networks by preventing co-adaptation of feature
+    detectors".
+
+    Furthermore, the outputs are scaled by a factor of :math:`\\frac{1}{1-p}` during
+    training. This means that during evaluation the module simply computes an
+    identity function.
+
+    Additionally, we can pass an extra Tensor `addend` which shape is consistent with input Tensor. 
+    The `addend` Tensor will be add in result after dropout, it is very useful in model's residual connection structure.
+
+    Args:
+        p: probability of an element to be zeroed. Default: 0.5
+        inplace: If set to ``True``, will do this operation in-place. Default: ``False``
+        generator:  A pseudorandom number generator for sampling
+
+    Shape:
+        - Input: :math:`(*)`. Input can be of any shape
+        - Output: :math:`(*)`. Output is of the same shape as input
+
+    For example:
+
+    example 1: 
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow as flow
+        
+        >>> m = flow.nn.Dropout(p=0)
+        >>> arr = np.array(
+        ...    [
+        ...        [-0.7797, 0.2264, 0.2458, 0.4163],
+        ...        [0.4299, 0.3626, -0.4892, 0.4141],
+        ...        [-1.4115, 1.2183, -0.5503, 0.6520],
+        ...    ]
+        ... )
+        >>> x = flow.Tensor(arr)
+        >>> y = m(x)
+        >>> y #doctest: +ELLIPSIS
+        tensor([[-0.7797,  0.2264,  0.2458,  0.4163],
+                [ 0.4299,  0.3626, -0.4892,  0.4141],
+                [-1.4115,  1.2183, -0.5503,  0.6520]], dtype=oneflow.float32)
+    
+    example 2: 
+    
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow as flow
+        
+        >>> m = flow.nn.Dropout(p=0)
+        >>> arr = np.array(
+        ...    [
+        ...        [-0.7797, 0.2264, 0.2458, 0.4163],
+        ...        [0.4299, 0.3626, -0.4892, 0.4141],
+        ...        [-1.4115, 1.2183, -0.5503, 0.6520],
+        ...    ]
+        ... )
+        >>> x = flow.Tensor(arr)
+        >>> addend = flow.ones((3, 4), dtype=flow.float32)
+        >>> y = m(x, addend=addend)
+        >>> y #doctest: +ELLIPSIS
+        tensor([[ 0.2203,  1.2264,  1.2458,  1.4163],
+                [ 1.4299,  1.3626,  0.5108,  1.4141],
+                [-0.4115,  2.2183,  0.4497,  1.6520]], dtype=oneflow.float32)
+    
+    .. _Improving neural networks by preventing co-adaptation of feature
+        detectors: https://arxiv.org/abs/1207.0580
+    """,
+)
+
+add_docstr(
+    oneflow.nn.Dropout1d,
+    """
+    Randomly zero out entire channels (a channel is a 1D feature map,
+    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
+    batched input is a 1D tensor :math:`\text{input}[i, j]`).
+    Each channel will be zeroed out independently on every forward call with
+    probability :attr:`p` using samples from a Bernoulli distribution.
+
+    The documentation is referenced from: https://pytorch.org/docs/1.10/generated/torch.nn.Dropout1d.html.
+
+    Usually the input comes from :class:`nn.Conv1d` modules.
+
+    As described in the paper
+    `Efficient Object Localization Using Convolutional Networks`_ ,
+    if adjacent pixels within feature maps are strongly correlated
+    (as is normally the case in early convolution layers) then i.i.d. dropout
+    will not regularize the activations and will otherwise just result
+    in an effective learning rate decrease.
+
+    In this case, :func:`oneflow.nn.Dropout1d` will help promote independence between
+    feature maps and should be used instead.
+
+    Args:
+        p (float, optional): probability of an element to be zero-ed.
+        inplace (bool, optional): If set to ``True``, will do this operation
+            in-place
+
+    Shape:
+        - Input: :math:`(N, C, L)` or :math:`(C, L)`.
+        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input).
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow as flow
+        
+        >>> m = flow.nn.Dropout1d(p=0)
+        >>> arr = np.array(
+        ...    [
+        ...        [-0.7797, 0.2264, 0.2458, 0.4163],
+        ...        [0.4299, 0.3626, -0.4892, 0.4141],
+        ...        [-1.4115, 1.2183, -0.5503, 0.6520],
+        ...    ]
+        ... )
+        >>> x = flow.Tensor(arr)
+        >>> y = m(x)
+        >>> y #doctest: +ELLIPSIS
+        tensor([[-0.7797,  0.2264,  0.2458,  0.4163],
+                [ 0.4299,  0.3626, -0.4892,  0.4141],
+                [-1.4115,  1.2183, -0.5503,  0.6520]], dtype=oneflow.float32)
+
+    .. _Efficient Object Localization Using Convolutional Networks:
+       https://arxiv.org/abs/1411.4280
+    """,
+)
+
+add_docstr(
+    oneflow.nn.Dropout2d,
+    """
+    Randomly zero out entire channels (a channel is a 2D feature map,
+    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
+    batched input is a 2D tensor :math:`\text{input}[i, j]`).
+    Each channel will be zeroed out independently on every forward call with
+    probability :attr:`p` using samples from a Bernoulli distribution.
+
+    The documentation is referenced from: https://pytorch.org/docs/1.10/generated/torch.nn.Dropout2d.html.
+
+    Usually the input comes from :class:`nn.Conv2d` modules.
+
+    As described in the paper
+    `Efficient Object Localization Using Convolutional Networks`_ ,
+    if adjacent pixels within feature maps are strongly correlated
+    (as is normally the case in early convolution layers) then i.i.d. dropout
+    will not regularize the activations and will otherwise just result
+    in an effective learning rate decrease.
+
+    In this case, :func:`oneflow.nn.Dropout2d` will help promote independence between
+    feature maps and should be used instead.
+
+    Args:
+        p (float, optional): probability of an element to be zero-ed.
+        inplace (bool, optional): If set to ``True``, will do this operation
+            in-place
+
+    Shape:
+        - Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`.
+        - Output: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input).
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow as flow
+        
+        >>> m = flow.nn.Dropout2d(p=0)
+        >>> arr = np.array(
+        ...    [
+        ...        [-0.7797, 0.2264, 0.2458, 0.4163],
+        ...        [0.4299, 0.3626, -0.4892, 0.4141],
+        ...        [-1.4115, 1.2183, -0.5503, 0.6520],
+        ...    ]
+        ... )
+        >>> x = flow.Tensor(arr)
+        >>> y = m(x)
+        >>> y #doctest: +ELLIPSIS
+        tensor([[-0.7797,  0.2264,  0.2458,  0.4163],
+                [ 0.4299,  0.3626, -0.4892,  0.4141],
+                [-1.4115,  1.2183, -0.5503,  0.6520]], dtype=oneflow.float32)
+
+    .. _Efficient Object Localization Using Convolutional Networks:
+       https://arxiv.org/abs/1411.4280
+    """,
+)
+
+add_docstr(
+    oneflow.nn.Dropout3d,
+    """
+    Randomly zero out entire channels (a channel is a 3D feature map,
+    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
+    batched input is a 3D tensor :math:`\text{input}[i, j]`).
+    Each channel will be zeroed out independently on every forward call with
+    probability :attr:`p` using samples from a Bernoulli distribution.
+
+    The documentation is referenced from: https://pytorch.org/docs/1.10/generated/torch.nn.Dropout2d.html.
+
+    Usually the input comes from :class:`nn.Conv3d` modules.
+
+    As described in the paper
+    `Efficient Object Localization Using Convolutional Networks`_ ,
+    if adjacent pixels within feature maps are strongly correlated
+    (as is normally the case in early convolution layers) then i.i.d. dropout
+    will not regularize the activations and will otherwise just result
+    in an effective learning rate decrease.
+
+    In this case, :func:`oneflow.nn.Dropout3d` will help promote independence between
+    feature maps and should be used instead.
+
+    Args:
+        p (float, optional): probability of an element to be zeroed.
+        inplace (bool, optional): If set to ``True``, will do this operation
+            in-place
+
+    Shape:
+        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`.
+        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input).
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow as flow
+        
+        >>> m = flow.nn.Dropout3d(p=0)
+        >>> arr = np.array(
+        ...    [
+        ...        [-0.7797, 0.2264, 0.2458, 0.4163],
+        ...        [0.4299, 0.3626, -0.4892, 0.4141],
+        ...        [-1.4115, 1.2183, -0.5503, 0.6520],
+        ...    ]
+        ... )
+        >>> x = flow.Tensor(arr)
+        >>> y = m(x)
+        >>> y #doctest: +ELLIPSIS
+        tensor([[-0.7797,  0.2264,  0.2458,  0.4163],
+                [ 0.4299,  0.3626, -0.4892,  0.4141],
+                [-1.4115,  1.2183, -0.5503,  0.6520]], dtype=oneflow.float32)
+
+    .. _Efficient Object Localization Using Convolutional Networks:
+       https://arxiv.org/abs/1411.4280
+    """,
+)
diff --git a/python/oneflow/nn/__init__.py b/python/oneflow/nn/__init__.py
index 05d9b9ac830..a160cbdc6b9 100644
--- a/python/oneflow/nn/__init__.py
+++ b/python/oneflow/nn/__init__.py
@@ -94,7 +94,7 @@
     RawReader,
 )
 
-from oneflow.nn.modules.dropout import Dropout
+from oneflow.nn.modules.dropout import Dropout, Dropout1d, Dropout2d, Dropout3d
 from oneflow.nn.modules.flatten import Flatten
 from oneflow.nn.modules.instancenorm import (
     InstanceNorm1d,
diff --git a/python/oneflow/nn/functional/__init__.py b/python/oneflow/nn/functional/__init__.py
index 56e3ed152c8..3587919d5c6 100644
--- a/python/oneflow/nn/functional/__init__.py
+++ b/python/oneflow/nn/functional/__init__.py
@@ -57,7 +57,7 @@
 from oneflow._C import silu
 from oneflow._C import mish
 from oneflow.nn.modules.normalization import layer_norm
-from oneflow._C import dropout
+from oneflow._C import dropout, dropout1d, dropout2d, dropout3d
 from oneflow._C import smooth_l1_loss
 from .functional_pad import pad
 from oneflow._C import triplet_margin_loss
diff --git a/python/oneflow/nn/modules/dropout.py b/python/oneflow/nn/modules/dropout.py
index ed4883af25f..30685419525 100644
--- a/python/oneflow/nn/modules/dropout.py
+++ b/python/oneflow/nn/modules/dropout.py
@@ -40,80 +40,6 @@ def extra_repr(self) -> str:
 
 
 class Dropout(_DropoutNd):
-    """During training, randomly zeroes some of the elements of the input
-    tensor with probability :attr:`p` using samples from a Bernoulli
-    distribution. Each channel will be zeroed out independently on every forward
-    call.
-
-    This has proven to be an effective technique for regularization and
-    preventing the co-adaptation of neurons as described in the paper
-    "Improving neural networks by preventing co-adaptation of feature
-    detectors".
-
-    Furthermore, the outputs are scaled by a factor of :math:`\\frac{1}{1-p}` during
-    training. This means that during evaluation the module simply computes an
-    identity function.
-
-    Additionally, we can pass an extra Tensor `addend` which shape is consistent with input Tensor. 
-    The `addend` Tensor will be add in result after dropout, it is very useful in model's residual connection structure.
-
-    Args:
-        p: probability of an element to be zeroed. Default: 0.5
-        inplace: If set to ``True``, will do this operation in-place. Default: ``False``
-        generator:  A pseudorandom number generator for sampling
-
-    Shape:
-        - Input: :math:`(*)`. Input can be of any shape
-        - Output: :math:`(*)`. Output is of the same shape as input
-
-    For example:
-
-    example 1: 
-
-    .. code-block:: python
-
-        >>> import numpy as np
-        >>> import oneflow as flow
-        
-        >>> m = flow.nn.Dropout(p=0)
-        >>> arr = np.array(
-        ...    [
-        ...        [-0.7797, 0.2264, 0.2458, 0.4163],
-        ...        [0.4299, 0.3626, -0.4892, 0.4141],
-        ...        [-1.4115, 1.2183, -0.5503, 0.6520],
-        ...    ]
-        ... )
-        >>> x = flow.Tensor(arr)
-        >>> y = m(x)
-        >>> y #doctest: +ELLIPSIS
-        tensor([[-0.7797,  0.2264,  0.2458,  0.4163],
-                [ 0.4299,  0.3626, -0.4892,  0.4141],
-                [-1.4115,  1.2183, -0.5503,  0.6520]], dtype=oneflow.float32)
-    
-    example 2: 
-    
-    .. code-block:: python
-
-        >>> import numpy as np
-        >>> import oneflow as flow
-        
-        >>> m = flow.nn.Dropout(p=0)
-        >>> arr = np.array(
-        ...    [
-        ...        [-0.7797, 0.2264, 0.2458, 0.4163],
-        ...        [0.4299, 0.3626, -0.4892, 0.4141],
-        ...        [-1.4115, 1.2183, -0.5503, 0.6520],
-        ...    ]
-        ... )
-        >>> x = flow.Tensor(arr)
-        >>> addend = flow.ones((3, 4), dtype=flow.float32)
-        >>> y = m(x, addend=addend)
-        >>> y #doctest: +ELLIPSIS
-        tensor([[ 0.2203,  1.2264,  1.2458,  1.4163],
-                [ 1.4299,  1.3626,  0.5108,  1.4141],
-                [-0.4115,  2.2183,  0.4497,  1.6520]], dtype=oneflow.float32)
-    """
-
     def __init__(self, p: float = 0.5, inplace: bool = False, generator=None):
         _DropoutNd.__init__(self, p, inplace)
         self.p = p
@@ -132,6 +58,21 @@ def forward(self, x, addend=None):
         )
 
 
+class Dropout1d(Dropout):
+    def forward(self, x, addend=None):
+        return flow._C.dropout1d(x, self.p, self.training)
+
+
+class Dropout2d(Dropout):
+    def forward(self, x, addend=None):
+        return flow._C.dropout2d(x, self.p, self.training)
+
+
+class Dropout3d(Dropout):
+    def forward(self, x, addend=None):
+        return flow._C.dropout3d(x, self.p, self.training)
+
+
 if __name__ == "__main__":
     import doctest
 
diff --git a/python/oneflow/test/modules/test_dropout.py b/python/oneflow/test/modules/test_dropout.py
index 8badc4b42b4..fbe96f525b1 100644
--- a/python/oneflow/test/modules/test_dropout.py
+++ b/python/oneflow/test/modules/test_dropout.py
@@ -239,7 +239,7 @@ class TestModule(flow.unittest.TestCase):
     def test_dropout_numpy_case(test_case):
         arg_dict = OrderedDict()
         arg_dict["test_fun"] = [do_test_dropout_numpy_p0, do_test_dropout_numpy_p1]
-        arg_dict["shape"] = [[4, 127, 256], [2, 1024, 1024]]
+        arg_dict["shape"] = [[4], [4, 3], [4, 127, 256], [2, 1024, 1024]]
         arg_dict["device"] = ["cuda"]
         if os.getenv("ONEFLOW_TEST_CPU_ONLY"):
             arg_dict["device"] = ["cpu"]
@@ -298,21 +298,81 @@ def test_gpu_fixed_dropout(test_case):
         for arg in GenArgList(arg_dict):
             arg[0](test_case)
 
-    @autotest()
+    @autotest(n=5)
     def autotest_dropout_p0(test_case):
         device = random_device()
         x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
         m = torch.nn.Dropout(p=0, inplace=random_bool())
         return m(x)
 
-    @autotest()
+    @autotest(n=5)
+    def autotest_dropout1d_p0(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout1d(p=0, inplace=random_bool())
+        return m(x)
+
+    @autotest(n=5)
+    def autotest_dropout2d_p0(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout2d(p=0, inplace=random_bool())
+        return m(x)
+
+    @autotest(n=5)
+    def autotest_dropout3d_p0(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout3d(p=0, inplace=random_bool())
+        return m(x)
+
+    @autotest(n=5)
     def autotest_dropout_p1(test_case):
         device = random_device()
         x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
         m = torch.nn.Dropout(p=1.0, inplace=random_bool())
         return m(x)
 
-    @autotest()
+    @autotest(n=5)
+    def autotest_dropout1d_p1(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout1d(p=1.0, inplace=random_bool())
+        return m(x)
+
+    @autotest(n=5)
+    def autotest_dropout2d_p1(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout2d(p=1.0, inplace=random_bool())
+        return m(x)
+
+    @autotest(n=5)
+    def autotest_dropout3d_p1(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout3d(p=1.0, inplace=random_bool())
+        return m(x)
+
+    @autotest(n=5)
+    def autotest_functional_dropout1d_p1(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        return torch.nn.functional.dropout1d(x, p=1.0)
+
+    @autotest(n=5)
+    def autotest_functional_dropout2d_p1(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        return torch.nn.functional.dropout2d(x, p=1.0)
+
+    @autotest(n=5)
+    def autotest_functional_dropout3d_p1(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        return torch.nn.functional.dropout3d(x, p=1.0)
+
+    @autotest(n=5)
     def autotest_dropout_eval(test_case):
         device = random_device()
         x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
@@ -320,7 +380,31 @@ def autotest_dropout_eval(test_case):
         m.eval()
         return m(x)
 
-    @autotest()
+    @autotest(n=5)
+    def autotest_dropout1d_eval(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout1d(p=1.0, inplace=random_bool())
+        m.eval()
+        return m(x)
+
+    @autotest(n=5)
+    def autotest_dropout2d_eval(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout2d(p=1.0, inplace=random_bool())
+        m.eval()
+        return m(x)
+
+    @autotest(n=5)
+    def autotest_dropout3d_eval(test_case):
+        device = random_device()
+        x = random_tensor(ndim=random(), dim0=random(1, 8)).to(device)
+        m = torch.nn.Dropout3d(p=1.0, inplace=random_bool())
+        m.eval()
+        return m(x)
+
+    @autotest(n=5)
     def autotest_0dim_dropout_eval(test_case):
         device = random_device()
         x = random_tensor(ndim=0).to(device)