🚨🚨🚨 The Great Deprecation 🚨🚨🚨 (#3098)

* The great purge * Clean * Some more fixings * Some more deprecations Benjamin found * Fix kwarghandler test
huggingface · Sep 13, 2024 · 79a8426 · 79a8426
1 parent 8a43837
commit 79a8426
Show file tree

Hide file tree

Showing 14 changed files with 9 additions and 250 deletions.
diff --git a/docs/source/package_reference/utilities.md b/docs/source/package_reference/utilities.md
@@ -202,8 +202,6 @@ These utilities relate to interacting with PyTorch models
 
 [[autodoc]] utils.set_module_tensor_to_device
 
-[[autodoc]] utils.shard_checkpoint
-
 
 ## Parallel
 

diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py
@@ -437,29 +437,6 @@ def __init__(
         if dataloader_config is None:
             dataloader_config = DataLoaderConfiguration()
         self.dataloader_config = dataloader_config
-        # Deal with deprecated args
-        # TODO: Remove in v1.0.0
-        deprecated_dl_args = {}
-        if dispatch_batches is not _dispatch_batches:
-            deprecated_dl_args["dispatch_batches"] = dispatch_batches
-            self.dataloader_config.dispatch_batches = dispatch_batches
-        if split_batches is not _split_batches:
-            deprecated_dl_args["split_batches"] = split_batches
-            self.dataloader_config.split_batches = split_batches
-        if even_batches is not _even_batches:
-            deprecated_dl_args["even_batches"] = even_batches
-            self.dataloader_config.even_batches = even_batches
-        if use_seedable_sampler is not _use_seedable_sampler:
-            deprecated_dl_args["use_seedable_sampler"] = use_seedable_sampler
-            self.dataloader_config.use_seedable_sampler = use_seedable_sampler
-        if len(deprecated_dl_args) > 0:
-            values = ", ".join([f"{k}={v}" for k, v in deprecated_dl_args.items()])
-            warnings.warn(
-                f"Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: {deprecated_dl_args.keys()}. "
-                "Please pass an `accelerate.DataLoaderConfiguration` instead: \n"
-                f"dataloader_config = DataLoaderConfiguration({values})",
-                FutureWarning,
-            )
         self.step_scheduler_with_optimizer = step_scheduler_with_optimizer
 
         # Mixed precision attributes
@@ -620,15 +597,6 @@ def is_local_main_process(self):
         """True for one process per server."""
         return self.state.is_local_main_process
 
-    @property
-    def use_fp16(self):
-        warnings.warn(
-            "The `use_fp16` property is deprecated and will be removed in version 1.0 of Accelerate use "
-            "`Accelerator.mixed_precision == 'fp16'` instead.",
-            FutureWarning,
-        )
-        return self.mixed_precision != "no"
-
     @property
     def is_last_process(self):
         return self.process_index == self.num_processes - 1
@@ -3408,7 +3376,7 @@ def register_for_checkpointing(self, *objects):
         self._custom_objects.extend(objects)
 
     @contextmanager
-    def autocast(self, cache_enabled: bool = False, autocast_handler: AutocastKwargs = None):
+    def autocast(self, autocast_handler: AutocastKwargs = None):
         """
         Will apply automatic mixed-precision inside the block inside this context manager, if it is enabled. Nothing
         different will happen otherwise.
@@ -3426,16 +3394,6 @@ def autocast(self, cache_enabled: bool = False, autocast_handler: AutocastKwargs
         ...     train()
         ```
         """
-        if cache_enabled:
-            warnings.warn(
-                "Passing `cache_enabled=True` to `accelerator.autocast` is deprecated and will be removed in v0.23.0. "
-                "Please use the `AutocastKwargs` class instead and pass it to the `Accelerator` as a `kwarg_handler`.",
-                FutureWarning,
-            )
-            if self.autocast_handler is not None:
-                self.autocast_handler.cache_enabled = True
-            else:
-                self.autocast_handler = AutocastKwargs(cache_enabled=True)
         if autocast_handler is None:
             autocast_handler = self.autocast_handler
         autocast_context = get_mixed_precision_context_manager(self.native_amp, autocast_handler)

diff --git a/src/accelerate/commands/launch.py b/src/accelerate/commands/launch.py
@@ -541,12 +541,6 @@ def launch_command_parser(subparsers=None):
         help="Transformer layer class name (case-sensitive) to wrap ,e.g, `BertLayer`, `GPTJBlock`, `T5Block` .... "
         "(useful only when `use_fsdp` flag is passed).",
     )
-    fsdp_args.add_argument(
-        "--fsdp_backward_prefetch_policy",
-        default=None,
-        type=str,
-        help="This argument is deprecated and will be removed in version 0.27.0 of 🤗 Accelerate. Use `fsdp_backward_prefetch` instead.",
-    )
     fsdp_args.add_argument(
         "--fsdp_backward_prefetch",
         default=None,

diff --git a/src/accelerate/optimizer.py b/src/accelerate/optimizer.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import inspect
-import warnings
 
 import torch
 
@@ -177,16 +176,6 @@ def _switch_parameters(self, parameters_map):
         for param_group in self.optimizer.param_groups:
             param_group["params"] = [parameters_map.get(p, p) for p in param_group["params"]]
 
-    @property
-    def is_overflow(self):
-        """Whether or not the optimizer step was done, or skipped because of gradient overflow."""
-        warnings.warn(
-            "The `is_overflow` property is deprecated and will be removed in version 1.0 of Accelerate use "
-            "`optimizer.step_was_skipped` instead.",
-            FutureWarning,
-        )
-        return self._is_overflow
-
     @property
     def step_was_skipped(self):
         """Whether or not the optimizer step was skipped."""

diff --git a/src/accelerate/state.py b/src/accelerate/state.py
@@ -962,16 +962,6 @@ def _check_initialized(self, mixed_precision=None, cpu=None):
             ):
                 raise ValueError(err.format(flag=f"mixed_precision='{mixed_precision}'"))
 
-    # For backward compatibility
-    @property
-    def use_fp16(self):
-        warnings.warn(
-            "The `use_fp16` property is deprecated and will be removed in version 1.0 of Accelerate use "
-            "`AcceleratorState.mixed_precision == 'fp16'` instead.",
-            FutureWarning,
-        )
-        return self._mixed_precision != "no"
-
     @property
     def mixed_precision(self):
         if self.distributed_type == DistributedType.DEEPSPEED:

diff --git a/src/accelerate/utils/__init__.py b/src/accelerate/utils/__init__.py
@@ -138,7 +138,6 @@
     named_module_tensors,
     retie_parameters,
     set_module_tensor_to_device,
-    shard_checkpoint,
 )
 from .offload import (
     OffloadedWeightsLoader,

diff --git a/src/accelerate/utils/dataclasses.py b/src/accelerate/utils/dataclasses.py
@@ -542,7 +542,6 @@ class DistributedType(str, enum.Enum):
         - **MULTI_XPU** -- Distributed on multiple XPUs.
         - **DEEPSPEED** -- Using DeepSpeed.
         - **XLA** -- Using TorchXLA.
-        - **TPU** -- This field will be deprecated in v0.27.0. Use XLA instead.
     """
 
     # Subclassing str as well as Enum allows the `DistributedType` to be JSON-serializable out of the box.

diff --git a/src/accelerate/utils/imports.py b/src/accelerate/utils/imports.py
@@ -131,28 +131,6 @@ def is_cuda_available():
     return available
 
 
-@lru_cache
-def is_tpu_available(check_device=True):
-    "Checks if `torch_xla` is installed and potentially if a TPU is in the environment"
-    warnings.warn(
-        "`is_tpu_available` is deprecated and will be removed in v0.27.0. "
-        "Please use the `is_torch_xla_available` instead.",
-        FutureWarning,
-    )
-    # Due to bugs on the amp series GPUs, we disable torch-xla on them
-    if is_cuda_available():
-        return False
-    if check_device:
-        if _tpu_available:
-            try:
-                # Will raise a RuntimeError if no XLA configuration is found
-                _ = xm.xla_device()
-                return True
-            except RuntimeError:
-                return False
-    return _tpu_available
-
-
 @lru_cache
 def is_torch_xla_available(check_is_tpu=False, check_is_gpu=False):
     """
@@ -274,11 +252,6 @@ def is_boto3_available():
 
 def is_rich_available():
     if _is_package_available("rich"):
-        if "ACCELERATE_DISABLE_RICH" in os.environ:
-            warnings.warn(
-                "`ACCELERATE_DISABLE_RICH` is deprecated and will be removed in v0.22.0 and deactivated by default. Please use `ACCELERATE_ENABLE_RICH` if you wish to use `rich`."
-            )
-            return not parse_flag_from_env("ACCELERATE_DISABLE_RICH", False)
         return parse_flag_from_env("ACCELERATE_ENABLE_RICH", False)
     return False
 

diff --git a/src/accelerate/utils/launch.py b/src/accelerate/utils/launch.py
@@ -16,7 +16,6 @@
 import os
 import subprocess
 import sys
-import warnings
 from ast import literal_eval
 from shutil import which
 from typing import Any, Dict, List, Tuple
@@ -275,13 +274,6 @@ def prepare_multi_gpu_env(args: argparse.Namespace) -> Dict[str, str]:
             current_env["FSDP_AUTO_WRAP_POLICY"] = str(args.fsdp_auto_wrap_policy)
         if args.fsdp_transformer_layer_cls_to_wrap is not None:
             current_env["FSDP_TRANSFORMER_CLS_TO_WRAP"] = str(args.fsdp_transformer_layer_cls_to_wrap)
-        if args.fsdp_backward_prefetch_policy is not None:
-            warnings.warn(
-                "`fsdp_backward_prefetch_policy` is deprecated and will be removed in version 0.27.0 of 🤗 Accelerate. Use"
-                " `fsdp_backward_prefetch` instead",
-                FutureWarning,
-            )
-            args.fsdp_backward_prefetch = args.fsdp_backward_prefetch_policy
         if args.fsdp_backward_prefetch is not None:
             current_env["FSDP_BACKWARD_PREFETCH"] = str(args.fsdp_backward_prefetch)
         if args.fsdp_state_dict_type is not None:

diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py
@@ -208,93 +208,6 @@ def id_tensor_storage(tensor: torch.Tensor) -> Tuple[torch.device, int, int]:
     return tensor.device, storage_ptr, storage_size
 
 
-def shard_checkpoint(
-    state_dict: Dict[str, torch.Tensor], max_shard_size: Union[int, str] = "10GB", weights_name: str = WEIGHTS_NAME
-):
-    """
-    Splits a model state dictionary in sub-checkpoints so that the final size of each sub-checkpoint does not exceed a
-    given size.
-
-    The sub-checkpoints are determined by iterating through the `state_dict` in the order of its keys, so there is no
-    optimization made to make each sub-checkpoint as close as possible to the maximum size passed. For example, if the
-    limit is 10GB and we have weights of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB],
-    [6+2+2GB] and not [6+2+2GB], [6+2GB], [6GB].
-
-    <Tip warning={true}>
-
-    If one of the model's weight is bigger that `max_sahrd_size`, it will end up in its own sub-checkpoint which will
-    have a size greater than `max_shard_size`.
-
-    </Tip>
-
-    Args:
-        state_dict (`Dict[str, torch.Tensor]`): The state dictionary of a model to save.
-        max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
-            The maximum size of each sub-checkpoint. If expressed as a string, needs to be digits followed by a unit
-            (like `"5MB"`).
-        weights_name (`str`, *optional*, defaults to `"pytorch_model.bin"`):
-            The name of the model save file.
-    """
-    logger.warning(
-        "Note that `shard_checkpoint` is deprecated and will be removed in  0.33.0. We recommend you using "
-        "split_torch_state_dict_into_shards from huggingface_hub library"
-    )
-
-    max_shard_size = convert_file_size_to_int(max_shard_size)
-
-    sharded_state_dicts = [{}]
-    last_block_size = 0
-    total_size = 0
-    storage_id_to_block = {}
-
-    for key, weight in state_dict.items():
-        # when bnb serialization is used the weights in the state dict can be strings
-        # check: https://github.com/huggingface/transformers/pull/24416 for more details
-        if isinstance(weight, str):
-            continue
-        else:
-            storage_id = id_tensor_storage(weight)
-
-        # If a `weight` shares the same underlying storage as another tensor, we put `weight` in the same `block`
-        if storage_id in storage_id_to_block:
-            block_id = storage_id_to_block[storage_id]
-            sharded_state_dicts[block_id][key] = weight
-            continue
-
-        weight_size = weight.numel() * dtype_byte_size(weight.dtype)
-
-        # If this weight is going to tip up over the maximal size, we split.
-        if last_block_size + weight_size > max_shard_size:
-            sharded_state_dicts.append({})
-            last_block_size = 0
-
-        sharded_state_dicts[-1][key] = weight
-        last_block_size += weight_size
-        total_size += weight_size
-        storage_id_to_block[storage_id] = len(sharded_state_dicts) - 1
-
-    # If we only have one shard, we return it
-    if len(sharded_state_dicts) == 1:
-        return {weights_name: sharded_state_dicts[0]}, None
-
-    # Otherwise, let's build the index
-    weight_map = {}
-    shards = {}
-    for idx, shard in enumerate(sharded_state_dicts):
-        shard_file = weights_name.replace(".bin", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.bin")
-        shard_file = shard_file.replace(
-            ".safetensors", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.safetensors"
-        )
-        shards[shard_file] = shard
-        for key in shard.keys():
-            weight_map[key] = shard_file
-
-    # Add the metadata
-    metadata = {"total_size": total_size}
-    index = {"metadata": metadata, "weight_map": weight_map}
-    return shards, index
-
-
 def set_module_tensor_to_device(
     module: nn.Module,
     tensor_name: str,
@@ -559,7 +472,10 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def values(self):
-        # TODO: at the next Transformers release (4.28.0) issue a deprecation warning here.
+        warnings.warn(
+            "The 'values' method of FindTiedParametersResult is deprecated and will be removed in Accelerate v1.3.0. ",
+            FutureWarning,
+        )
         return sum([x[1:] for x in self], [])
 
 

diff --git a/src/accelerate/utils/tqdm.py b/src/accelerate/utils/tqdm.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import warnings
 
 from .imports import is_tqdm_available
 
@@ -34,13 +33,10 @@ def tqdm(*args, main_process_only: bool = True, **kwargs):
     if not is_tqdm_available():
         raise ImportError("Accelerate's `tqdm` module requires `tqdm` to be installed. Please run `pip install tqdm`.")
     if len(args) > 0 and isinstance(args[0], bool):
-        warnings.warn(
-            f"Passing `{args[0]}` as the first argument to Accelerate's `tqdm` wrapper is deprecated "
-            "and will be removed in v0.33.0. Please use the `main_process_only` keyword argument instead.",
-            FutureWarning,
+        raise ValueError(
+            "Passing `True` or `False` as the first argument to Accelerate's `tqdm` wrapper is unsupported. "
+            "Please use the `main_process_only` keyword argument instead."
         )
-        main_process_only = args[0]
-        args = args[1:]
     disable = kwargs.pop("disable", False)
     if main_process_only and not disable:
         disable = PartialState().local_process_index != 0

diff --git a/tests/test_accelerator.py b/tests/test_accelerator.py
@@ -20,7 +20,6 @@
 from unittest.mock import patch
 
 import psutil
-import pytest
 import torch
 from parameterized import parameterized
 from torch.utils.data import DataLoader, TensorDataset
@@ -126,43 +125,6 @@ def parameterized_custom_name_func(func, param_num, param):
 
 
 class AcceleratorTester(AccelerateTestCase):
-    # Should be removed after 1.0.0 release
-    def test_deprecated_values(self):
-        # Test defaults
-        accelerator = Accelerator()
-        assert accelerator.split_batches is False, "split_batches should be False by default"
-        assert accelerator.dispatch_batches is None, "dispatch_batches should be None by default"
-        assert accelerator.even_batches is True, "even_batches should be True by default"
-        assert accelerator.use_seedable_sampler is False, "use_seedable_sampler should be False by default"
-
-        # Pass some arguments only
-        with pytest.warns(FutureWarning) as cm:
-            accelerator = Accelerator(
-                dispatch_batches=True,
-                split_batches=False,
-            )
-            deprecation_warning = str(cm.list[0].message)
-            assert accelerator.split_batches is False, "split_batches should be True"
-            assert accelerator.dispatch_batches is True, "dispatch_batches should be True"
-            assert accelerator.even_batches is True, "even_batches should be True by default"
-            assert accelerator.use_seedable_sampler is False, "use_seedable_sampler should be False by default"
-            assert "dispatch_batches" in deprecation_warning
-            assert "split_batches" in deprecation_warning
-            assert "even_batches" not in deprecation_warning
-            assert "use_seedable_sampler" not in deprecation_warning
-
-        # Pass in some arguments, but with their defaults
-        with pytest.warns(FutureWarning) as cm:
-            accelerator = Accelerator(
-                even_batches=True,
-                use_seedable_sampler=False,
-            )
-            deprecation_warning = str(cm.list[0].message)
-            assert "even_batches" in deprecation_warning
-            assert accelerator.even_batches is True
-            assert "use_seedable_sampler" in deprecation_warning
-            assert accelerator.use_seedable_sampler is False
-
     def test_partial_state_after_reset(self):
         # Verifies that custom getattr errors will be thrown
         # if the state is reset, but only if trying to
Original file line number	Diff line number	Diff line change
Expand Up		@@ -202,8 +202,6 @@ These utilities relate to interacting with PyTorch models

		[[autodoc]] utils.set_module_tensor_to_device

		[[autodoc]] utils.shard_checkpoint


		## Parallel

Expand Down