JunnYu
diff --git a/‎requirements.txt
Lines changed: 1 addition & 1 deletion b/‎requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.py
Lines changed: 2 additions & 2 deletions b/‎setup.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/roformer/__init__.py
Lines changed: 8 additions & 2 deletions b/‎src/roformer/__init__.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎src/roformer/configuration_roformer.py
Lines changed: 4 additions & 15 deletions b/‎src/roformer/configuration_roformer.py
Lines changed: 4 additions & 15 deletions
diff --git a/‎src/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py
Lines changed: 1 addition & 1 deletion b/‎src/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py
Lines changed: 1 addition & 1 deletion
@@ -1,5 +1,5 @@
 git+https://github.com/lonePatient/TorchBlocks.git
-transformers>=4.5.0
+transformers>=4.12.5
 bert4keras
 rjieba
 jieba
 
@@ -4,12 +4,12 @@
     name="roformer",
     package_dir={"": "src"},
     packages=find_packages("src"),
-    version="0.2.2",
+    version="0.3.0",
     license="Apache 2.0",
     description="roformer_pytorch",
     author="Jun Yu",
     author_email="[email protected]",
     url="https://github.com/JunnYu/RoFormer_pytorch",
     keywords=["roformer", "pytorch", "tf2.0"],
-    install_requires=["transformers==4.9.1", "jieba"],
+    install_requires=["transformers>=4.12.5", "rjieba"],
 )
@@ -17,13 +17,14 @@
 # limitations under the License.
 from typing import TYPE_CHECKING
 
-from transformers.file_utils import (
+from .transformers.file_utils import (
     _LazyModule,
     is_tf_available,
     is_tokenizers_available,
     is_torch_available,
 )
 
+
 _import_structure = {
     "configuration_roformer": [
         "ROFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP",
@@ -50,6 +51,7 @@
         "load_tf_weights_in_roformer",
     ]
 
+
 if is_tf_available():
     _import_structure["modeling_tf_roformer"] = [
         "TF_ROFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
@@ -64,6 +66,7 @@
         "TFRoFormerPreTrainedModel",
     ]
 
+
 if TYPE_CHECKING:
     from .configuration_roformer import (
         ROFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
@@ -103,7 +106,10 @@
             TFRoFormerPreTrainedModel,
         )
 
+
 else:
     import sys
 
-    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
+    sys.modules[__name__] = _LazyModule(
+        __name__, globals()["__file__"], _import_structure
+    )
@@ -14,8 +14,8 @@
 # limitations under the License.
 """ RoFormer model configuration """
 
-from transformers.configuration_utils import PretrainedConfig
-from transformers.utils import logging
+from .transformers.configuration_utils import PretrainedConfig
+from .transformers.utils import logging
 
 logger = logging.get_logger(__name__)
 
@@ -36,18 +36,16 @@ class RoFormerConfig(PretrainedConfig):
     instantiate an RoFormer model according to the specified arguments, defining the model architecture. Instantiating
     a configuration with the defaults will yield a similar configuration to that of the RoFormer
     `junnyu/roformer_chinese_base <https://huggingface.co/junnyu/roformer_chinese_base>`__ architecture.
-
     Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
     outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
-
-
     Args:
         vocab_size (:obj:`int`, `optional`, defaults to 50000):
             Vocabulary size of the RoFormer model. Defines the number of different tokens that can be represented by
             the :obj:`inputs_ids` passed when calling :class:`~transformers.RoFormerModel` or
             :class:`~transformers.TFRoFormerModel`.
         embedding_size (:obj:`int`, `optional`, defaults to None):
-            Dimensionality of the encoder layers and the pooler layer.
+            Dimensionality of the encoder layers and the pooler layer. Defaults to the :obj:`hidden_size` if not
+            provided.
         hidden_size (:obj:`int`, `optional`, defaults to 768):
             Dimension of the encoder layers and the pooler layer.
         num_hidden_layers (:obj:`int`, `optional`, defaults to 12):
@@ -78,19 +76,12 @@ class RoFormerConfig(PretrainedConfig):
             relevant if ``config.is_decoder=True``.
         rotary_value (:obj:`bool`, `optional`, defaults to :obj:`False`):
             Whether or not apply rotary position embeddings on value layer.
-        gradient_checkpointing (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
-
     Example::
-
         >>> from transformers import RoFormerModel, RoFormerConfig
-
         >>> # Initializing a RoFormer junnyu/roformer_chinese_base style configuration
         >>> configuration = RoFormerConfig()
-
         >>> # Initializing a model from the junnyu/roformer_chinese_base style configuration
         >>> model = RoFormerModel(configuration)
-
         >>> # Accessing the model configuration
         >>> configuration = model.config
     """
@@ -112,7 +103,6 @@ def __init__(
         initializer_range=0.02,
         layer_norm_eps=1e-12,
         pad_token_id=0,
-        gradient_checkpointing=False,
         rotary_value=False,
         use_cache=True,
         **kwargs
@@ -132,6 +122,5 @@ def __init__(
         self.type_vocab_size = type_vocab_size
         self.initializer_range = initializer_range
         self.layer_norm_eps = layer_norm_eps
-        self.gradient_checkpointing = gradient_checkpointing
         self.rotary_value = rotary_value
         self.use_cache = use_cache
@@ -17,7 +17,7 @@
 import argparse
 
 import torch
-from transformers.utils import logging
+from .transformers.utils import logging
 
 from roformer import RoFormerConfig, RoFormerForMaskedLM, load_tf_weights_in_roformer