diff --git a/docs/source/package_reference/utilities.md b/docs/source/package_reference/utilities.md index b1c34cb5b7a..9e7aece6df7 100644 --- a/docs/source/package_reference/utilities.md +++ b/docs/source/package_reference/utilities.md @@ -202,8 +202,6 @@ These utilities relate to interacting with PyTorch models [[autodoc]] utils.set_module_tensor_to_device -[[autodoc]] utils.shard_checkpoint - ## Parallel diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index e13d5e7bfeb..736186104d5 100755 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -437,29 +437,6 @@ def __init__( if dataloader_config is None: dataloader_config = DataLoaderConfiguration() self.dataloader_config = dataloader_config - # Deal with deprecated args - # TODO: Remove in v1.0.0 - deprecated_dl_args = {} - if dispatch_batches is not _dispatch_batches: - deprecated_dl_args["dispatch_batches"] = dispatch_batches - self.dataloader_config.dispatch_batches = dispatch_batches - if split_batches is not _split_batches: - deprecated_dl_args["split_batches"] = split_batches - self.dataloader_config.split_batches = split_batches - if even_batches is not _even_batches: - deprecated_dl_args["even_batches"] = even_batches - self.dataloader_config.even_batches = even_batches - if use_seedable_sampler is not _use_seedable_sampler: - deprecated_dl_args["use_seedable_sampler"] = use_seedable_sampler - self.dataloader_config.use_seedable_sampler = use_seedable_sampler - if len(deprecated_dl_args) > 0: - values = ", ".join([f"{k}={v}" for k, v in deprecated_dl_args.items()]) - warnings.warn( - f"Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: {deprecated_dl_args.keys()}. " - "Please pass an `accelerate.DataLoaderConfiguration` instead: \n" - f"dataloader_config = DataLoaderConfiguration({values})", - FutureWarning, - ) self.step_scheduler_with_optimizer = step_scheduler_with_optimizer # Mixed precision attributes @@ -620,15 +597,6 @@ def is_local_main_process(self): """True for one process per server.""" return self.state.is_local_main_process - @property - def use_fp16(self): - warnings.warn( - "The `use_fp16` property is deprecated and will be removed in version 1.0 of Accelerate use " - "`Accelerator.mixed_precision == 'fp16'` instead.", - FutureWarning, - ) - return self.mixed_precision != "no" - @property def is_last_process(self): return self.process_index == self.num_processes - 1 @@ -3408,7 +3376,7 @@ def register_for_checkpointing(self, *objects): self._custom_objects.extend(objects) @contextmanager - def autocast(self, cache_enabled: bool = False, autocast_handler: AutocastKwargs = None): + def autocast(self, autocast_handler: AutocastKwargs = None): """ Will apply automatic mixed-precision inside the block inside this context manager, if it is enabled. Nothing different will happen otherwise. @@ -3426,16 +3394,6 @@ def autocast(self, cache_enabled: bool = False, autocast_handler: AutocastKwargs ... train() ``` """ - if cache_enabled: - warnings.warn( - "Passing `cache_enabled=True` to `accelerator.autocast` is deprecated and will be removed in v0.23.0. " - "Please use the `AutocastKwargs` class instead and pass it to the `Accelerator` as a `kwarg_handler`.", - FutureWarning, - ) - if self.autocast_handler is not None: - self.autocast_handler.cache_enabled = True - else: - self.autocast_handler = AutocastKwargs(cache_enabled=True) if autocast_handler is None: autocast_handler = self.autocast_handler autocast_context = get_mixed_precision_context_manager(self.native_amp, autocast_handler) diff --git a/src/accelerate/commands/launch.py b/src/accelerate/commands/launch.py index b4f606f493a..92e27cbfd4a 100644 --- a/src/accelerate/commands/launch.py +++ b/src/accelerate/commands/launch.py @@ -541,12 +541,6 @@ def launch_command_parser(subparsers=None): help="Transformer layer class name (case-sensitive) to wrap ,e.g, `BertLayer`, `GPTJBlock`, `T5Block` .... " "(useful only when `use_fsdp` flag is passed).", ) - fsdp_args.add_argument( - "--fsdp_backward_prefetch_policy", - default=None, - type=str, - help="This argument is deprecated and will be removed in version 0.27.0 of 🤗 Accelerate. Use `fsdp_backward_prefetch` instead.", - ) fsdp_args.add_argument( "--fsdp_backward_prefetch", default=None, diff --git a/src/accelerate/optimizer.py b/src/accelerate/optimizer.py index 1c0a777dcba..acc238a1a99 100644 --- a/src/accelerate/optimizer.py +++ b/src/accelerate/optimizer.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import warnings import torch @@ -177,16 +176,6 @@ def _switch_parameters(self, parameters_map): for param_group in self.optimizer.param_groups: param_group["params"] = [parameters_map.get(p, p) for p in param_group["params"]] - @property - def is_overflow(self): - """Whether or not the optimizer step was done, or skipped because of gradient overflow.""" - warnings.warn( - "The `is_overflow` property is deprecated and will be removed in version 1.0 of Accelerate use " - "`optimizer.step_was_skipped` instead.", - FutureWarning, - ) - return self._is_overflow - @property def step_was_skipped(self): """Whether or not the optimizer step was skipped.""" diff --git a/src/accelerate/state.py b/src/accelerate/state.py index 37c96babb9e..0b4631361c9 100644 --- a/src/accelerate/state.py +++ b/src/accelerate/state.py @@ -962,16 +962,6 @@ def _check_initialized(self, mixed_precision=None, cpu=None): ): raise ValueError(err.format(flag=f"mixed_precision='{mixed_precision}'")) - # For backward compatibility - @property - def use_fp16(self): - warnings.warn( - "The `use_fp16` property is deprecated and will be removed in version 1.0 of Accelerate use " - "`AcceleratorState.mixed_precision == 'fp16'` instead.", - FutureWarning, - ) - return self._mixed_precision != "no" - @property def mixed_precision(self): if self.distributed_type == DistributedType.DEEPSPEED: diff --git a/src/accelerate/utils/__init__.py b/src/accelerate/utils/__init__.py index ed6c77d8de6..d9fa88990d7 100644 --- a/src/accelerate/utils/__init__.py +++ b/src/accelerate/utils/__init__.py @@ -138,7 +138,6 @@ named_module_tensors, retie_parameters, set_module_tensor_to_device, - shard_checkpoint, ) from .offload import ( OffloadedWeightsLoader, diff --git a/src/accelerate/utils/dataclasses.py b/src/accelerate/utils/dataclasses.py index ef2715978e3..39c7958a7b6 100644 --- a/src/accelerate/utils/dataclasses.py +++ b/src/accelerate/utils/dataclasses.py @@ -542,7 +542,6 @@ class DistributedType(str, enum.Enum): - **MULTI_XPU** -- Distributed on multiple XPUs. - **DEEPSPEED** -- Using DeepSpeed. - **XLA** -- Using TorchXLA. - - **TPU** -- This field will be deprecated in v0.27.0. Use XLA instead. """ # Subclassing str as well as Enum allows the `DistributedType` to be JSON-serializable out of the box. diff --git a/src/accelerate/utils/imports.py b/src/accelerate/utils/imports.py index ee02aac3098..7a79d4f0833 100644 --- a/src/accelerate/utils/imports.py +++ b/src/accelerate/utils/imports.py @@ -131,28 +131,6 @@ def is_cuda_available(): return available -@lru_cache -def is_tpu_available(check_device=True): - "Checks if `torch_xla` is installed and potentially if a TPU is in the environment" - warnings.warn( - "`is_tpu_available` is deprecated and will be removed in v0.27.0. " - "Please use the `is_torch_xla_available` instead.", - FutureWarning, - ) - # Due to bugs on the amp series GPUs, we disable torch-xla on them - if is_cuda_available(): - return False - if check_device: - if _tpu_available: - try: - # Will raise a RuntimeError if no XLA configuration is found - _ = xm.xla_device() - return True - except RuntimeError: - return False - return _tpu_available - - @lru_cache def is_torch_xla_available(check_is_tpu=False, check_is_gpu=False): """ @@ -274,11 +252,6 @@ def is_boto3_available(): def is_rich_available(): if _is_package_available("rich"): - if "ACCELERATE_DISABLE_RICH" in os.environ: - warnings.warn( - "`ACCELERATE_DISABLE_RICH` is deprecated and will be removed in v0.22.0 and deactivated by default. Please use `ACCELERATE_ENABLE_RICH` if you wish to use `rich`." - ) - return not parse_flag_from_env("ACCELERATE_DISABLE_RICH", False) return parse_flag_from_env("ACCELERATE_ENABLE_RICH", False) return False diff --git a/src/accelerate/utils/launch.py b/src/accelerate/utils/launch.py index 56cf1a729b4..c6f3d60031d 100644 --- a/src/accelerate/utils/launch.py +++ b/src/accelerate/utils/launch.py @@ -16,7 +16,6 @@ import os import subprocess import sys -import warnings from ast import literal_eval from shutil import which from typing import Any, Dict, List, Tuple @@ -275,13 +274,6 @@ def prepare_multi_gpu_env(args: argparse.Namespace) -> Dict[str, str]: current_env["FSDP_AUTO_WRAP_POLICY"] = str(args.fsdp_auto_wrap_policy) if args.fsdp_transformer_layer_cls_to_wrap is not None: current_env["FSDP_TRANSFORMER_CLS_TO_WRAP"] = str(args.fsdp_transformer_layer_cls_to_wrap) - if args.fsdp_backward_prefetch_policy is not None: - warnings.warn( - "`fsdp_backward_prefetch_policy` is deprecated and will be removed in version 0.27.0 of 🤗 Accelerate. Use" - " `fsdp_backward_prefetch` instead", - FutureWarning, - ) - args.fsdp_backward_prefetch = args.fsdp_backward_prefetch_policy if args.fsdp_backward_prefetch is not None: current_env["FSDP_BACKWARD_PREFETCH"] = str(args.fsdp_backward_prefetch) if args.fsdp_state_dict_type is not None: diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py index 40798387a1e..f4230c55994 100644 --- a/src/accelerate/utils/modeling.py +++ b/src/accelerate/utils/modeling.py @@ -208,93 +208,6 @@ def id_tensor_storage(tensor: torch.Tensor) -> Tuple[torch.device, int, int]: return tensor.device, storage_ptr, storage_size -def shard_checkpoint( - state_dict: Dict[str, torch.Tensor], max_shard_size: Union[int, str] = "10GB", weights_name: str = WEIGHTS_NAME -): - """ - Splits a model state dictionary in sub-checkpoints so that the final size of each sub-checkpoint does not exceed a - given size. - - The sub-checkpoints are determined by iterating through the `state_dict` in the order of its keys, so there is no - optimization made to make each sub-checkpoint as close as possible to the maximum size passed. For example, if the - limit is 10GB and we have weights of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], - [6+2+2GB] and not [6+2+2GB], [6+2GB], [6GB]. - - - - If one of the model's weight is bigger that `max_sahrd_size`, it will end up in its own sub-checkpoint which will - have a size greater than `max_shard_size`. - - - - Args: - state_dict (`Dict[str, torch.Tensor]`): The state dictionary of a model to save. - max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`): - The maximum size of each sub-checkpoint. If expressed as a string, needs to be digits followed by a unit - (like `"5MB"`). - weights_name (`str`, *optional*, defaults to `"pytorch_model.bin"`): - The name of the model save file. - """ - logger.warning( - "Note that `shard_checkpoint` is deprecated and will be removed in 0.33.0. We recommend you using " - "split_torch_state_dict_into_shards from huggingface_hub library" - ) - - max_shard_size = convert_file_size_to_int(max_shard_size) - - sharded_state_dicts = [{}] - last_block_size = 0 - total_size = 0 - storage_id_to_block = {} - - for key, weight in state_dict.items(): - # when bnb serialization is used the weights in the state dict can be strings - # check: https://github.com/huggingface/transformers/pull/24416 for more details - if isinstance(weight, str): - continue - else: - storage_id = id_tensor_storage(weight) - - # If a `weight` shares the same underlying storage as another tensor, we put `weight` in the same `block` - if storage_id in storage_id_to_block: - block_id = storage_id_to_block[storage_id] - sharded_state_dicts[block_id][key] = weight - continue - - weight_size = weight.numel() * dtype_byte_size(weight.dtype) - - # If this weight is going to tip up over the maximal size, we split. - if last_block_size + weight_size > max_shard_size: - sharded_state_dicts.append({}) - last_block_size = 0 - - sharded_state_dicts[-1][key] = weight - last_block_size += weight_size - total_size += weight_size - storage_id_to_block[storage_id] = len(sharded_state_dicts) - 1 - - # If we only have one shard, we return it - if len(sharded_state_dicts) == 1: - return {weights_name: sharded_state_dicts[0]}, None - - # Otherwise, let's build the index - weight_map = {} - shards = {} - for idx, shard in enumerate(sharded_state_dicts): - shard_file = weights_name.replace(".bin", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.bin") - shard_file = shard_file.replace( - ".safetensors", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.safetensors" - ) - shards[shard_file] = shard - for key in shard.keys(): - weight_map[key] = shard_file - - # Add the metadata - metadata = {"total_size": total_size} - index = {"metadata": metadata, "weight_map": weight_map} - return shards, index - - def set_module_tensor_to_device( module: nn.Module, tensor_name: str, @@ -559,7 +472,10 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def values(self): - # TODO: at the next Transformers release (4.28.0) issue a deprecation warning here. + warnings.warn( + "The 'values' method of FindTiedParametersResult is deprecated and will be removed in Accelerate v1.3.0. ", + FutureWarning, + ) return sum([x[1:] for x in self], []) diff --git a/src/accelerate/utils/tqdm.py b/src/accelerate/utils/tqdm.py index 1771366c84d..2d4873c1573 100644 --- a/src/accelerate/utils/tqdm.py +++ b/src/accelerate/utils/tqdm.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import warnings from .imports import is_tqdm_available @@ -34,13 +33,10 @@ def tqdm(*args, main_process_only: bool = True, **kwargs): if not is_tqdm_available(): raise ImportError("Accelerate's `tqdm` module requires `tqdm` to be installed. Please run `pip install tqdm`.") if len(args) > 0 and isinstance(args[0], bool): - warnings.warn( - f"Passing `{args[0]}` as the first argument to Accelerate's `tqdm` wrapper is deprecated " - "and will be removed in v0.33.0. Please use the `main_process_only` keyword argument instead.", - FutureWarning, + raise ValueError( + "Passing `True` or `False` as the first argument to Accelerate's `tqdm` wrapper is unsupported. " + "Please use the `main_process_only` keyword argument instead." ) - main_process_only = args[0] - args = args[1:] disable = kwargs.pop("disable", False) if main_process_only and not disable: disable = PartialState().local_process_index != 0 diff --git a/tests/test_accelerator.py b/tests/test_accelerator.py index 00c18506ced..9b18fe5c909 100644 --- a/tests/test_accelerator.py +++ b/tests/test_accelerator.py @@ -20,7 +20,6 @@ from unittest.mock import patch import psutil -import pytest import torch from parameterized import parameterized from torch.utils.data import DataLoader, TensorDataset @@ -126,43 +125,6 @@ def parameterized_custom_name_func(func, param_num, param): class AcceleratorTester(AccelerateTestCase): - # Should be removed after 1.0.0 release - def test_deprecated_values(self): - # Test defaults - accelerator = Accelerator() - assert accelerator.split_batches is False, "split_batches should be False by default" - assert accelerator.dispatch_batches is None, "dispatch_batches should be None by default" - assert accelerator.even_batches is True, "even_batches should be True by default" - assert accelerator.use_seedable_sampler is False, "use_seedable_sampler should be False by default" - - # Pass some arguments only - with pytest.warns(FutureWarning) as cm: - accelerator = Accelerator( - dispatch_batches=True, - split_batches=False, - ) - deprecation_warning = str(cm.list[0].message) - assert accelerator.split_batches is False, "split_batches should be True" - assert accelerator.dispatch_batches is True, "dispatch_batches should be True" - assert accelerator.even_batches is True, "even_batches should be True by default" - assert accelerator.use_seedable_sampler is False, "use_seedable_sampler should be False by default" - assert "dispatch_batches" in deprecation_warning - assert "split_batches" in deprecation_warning - assert "even_batches" not in deprecation_warning - assert "use_seedable_sampler" not in deprecation_warning - - # Pass in some arguments, but with their defaults - with pytest.warns(FutureWarning) as cm: - accelerator = Accelerator( - even_batches=True, - use_seedable_sampler=False, - ) - deprecation_warning = str(cm.list[0].message) - assert "even_batches" in deprecation_warning - assert accelerator.even_batches is True - assert "use_seedable_sampler" in deprecation_warning - assert accelerator.use_seedable_sampler is False - def test_partial_state_after_reset(self): # Verifies that custom getattr errors will be thrown # if the state is reset, but only if trying to diff --git a/tests/test_kwargs_handlers.py b/tests/test_kwargs_handlers.py index 66e2cbb14fc..bf8bd1bd939 100644 --- a/tests/test_kwargs_handlers.py +++ b/tests/test_kwargs_handlers.py @@ -56,7 +56,7 @@ def test_grad_scaler_kwargs(self): scaler_handler = GradScalerKwargs(init_scale=1024, growth_factor=2) AcceleratorState._reset_state() accelerator = Accelerator(mixed_precision="fp16", kwargs_handlers=[scaler_handler]) - print(accelerator.use_fp16) + assert accelerator.mixed_precision == "fp16" scaler = accelerator.scaler # Check the kwargs have been applied diff --git a/tests/test_utils.py b/tests/test_utils.py index aa95ed9703f..ed4481ed92c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -53,7 +53,6 @@ recursively_apply, save, send_to_device, - tqdm, ) from accelerate.utils.operations import is_namedtuple @@ -405,9 +404,3 @@ def test_convert_dict_to_env_variables(self): with self.assertLogs("accelerate.utils.environment", level="WARNING"): valid_env_items = convert_dict_to_env_variables(env) assert valid_env_items == ["ACCELERATE_DEBUG_MODE=1\n", "OTHER_ENV=2\n"] - - def test_tqdm_deprecation(self): - with pytest.warns(FutureWarning) as cm: - tqdm(True, range(3), disable=True) - assert "Passing `True` as the first argument to" in cm.pop().message.args[0] - tqdm(range(3), main_process_only=True, disable=True)