Skip to content

Commit

Permalink
🚨🚨🚨 The Great Deprecation 🚨🚨🚨 (#3098)
Browse files Browse the repository at this point in the history
* The great purge

* Clean

* Some more fixings

* Some more deprecations Benjamin found

* Fix kwarghandler test
  • Loading branch information
muellerzr authored Sep 13, 2024
1 parent 8a43837 commit 79a8426
Show file tree
Hide file tree
Showing 14 changed files with 9 additions and 250 deletions.
2 changes: 0 additions & 2 deletions docs/source/package_reference/utilities.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,6 @@ These utilities relate to interacting with PyTorch models

[[autodoc]] utils.set_module_tensor_to_device

[[autodoc]] utils.shard_checkpoint


## Parallel

Expand Down
44 changes: 1 addition & 43 deletions src/accelerate/accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,29 +437,6 @@ def __init__(
if dataloader_config is None:
dataloader_config = DataLoaderConfiguration()
self.dataloader_config = dataloader_config
# Deal with deprecated args
# TODO: Remove in v1.0.0
deprecated_dl_args = {}
if dispatch_batches is not _dispatch_batches:
deprecated_dl_args["dispatch_batches"] = dispatch_batches
self.dataloader_config.dispatch_batches = dispatch_batches
if split_batches is not _split_batches:
deprecated_dl_args["split_batches"] = split_batches
self.dataloader_config.split_batches = split_batches
if even_batches is not _even_batches:
deprecated_dl_args["even_batches"] = even_batches
self.dataloader_config.even_batches = even_batches
if use_seedable_sampler is not _use_seedable_sampler:
deprecated_dl_args["use_seedable_sampler"] = use_seedable_sampler
self.dataloader_config.use_seedable_sampler = use_seedable_sampler
if len(deprecated_dl_args) > 0:
values = ", ".join([f"{k}={v}" for k, v in deprecated_dl_args.items()])
warnings.warn(
f"Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: {deprecated_dl_args.keys()}. "
"Please pass an `accelerate.DataLoaderConfiguration` instead: \n"
f"dataloader_config = DataLoaderConfiguration({values})",
FutureWarning,
)
self.step_scheduler_with_optimizer = step_scheduler_with_optimizer

# Mixed precision attributes
Expand Down Expand Up @@ -620,15 +597,6 @@ def is_local_main_process(self):
"""True for one process per server."""
return self.state.is_local_main_process

@property
def use_fp16(self):
warnings.warn(
"The `use_fp16` property is deprecated and will be removed in version 1.0 of Accelerate use "
"`Accelerator.mixed_precision == 'fp16'` instead.",
FutureWarning,
)
return self.mixed_precision != "no"

@property
def is_last_process(self):
return self.process_index == self.num_processes - 1
Expand Down Expand Up @@ -3408,7 +3376,7 @@ def register_for_checkpointing(self, *objects):
self._custom_objects.extend(objects)

@contextmanager
def autocast(self, cache_enabled: bool = False, autocast_handler: AutocastKwargs = None):
def autocast(self, autocast_handler: AutocastKwargs = None):
"""
Will apply automatic mixed-precision inside the block inside this context manager, if it is enabled. Nothing
different will happen otherwise.
Expand All @@ -3426,16 +3394,6 @@ def autocast(self, cache_enabled: bool = False, autocast_handler: AutocastKwargs
... train()
```
"""
if cache_enabled:
warnings.warn(
"Passing `cache_enabled=True` to `accelerator.autocast` is deprecated and will be removed in v0.23.0. "
"Please use the `AutocastKwargs` class instead and pass it to the `Accelerator` as a `kwarg_handler`.",
FutureWarning,
)
if self.autocast_handler is not None:
self.autocast_handler.cache_enabled = True
else:
self.autocast_handler = AutocastKwargs(cache_enabled=True)
if autocast_handler is None:
autocast_handler = self.autocast_handler
autocast_context = get_mixed_precision_context_manager(self.native_amp, autocast_handler)
Expand Down
6 changes: 0 additions & 6 deletions src/accelerate/commands/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,12 +541,6 @@ def launch_command_parser(subparsers=None):
help="Transformer layer class name (case-sensitive) to wrap ,e.g, `BertLayer`, `GPTJBlock`, `T5Block` .... "
"(useful only when `use_fsdp` flag is passed).",
)
fsdp_args.add_argument(
"--fsdp_backward_prefetch_policy",
default=None,
type=str,
help="This argument is deprecated and will be removed in version 0.27.0 of 🤗 Accelerate. Use `fsdp_backward_prefetch` instead.",
)
fsdp_args.add_argument(
"--fsdp_backward_prefetch",
default=None,
Expand Down
11 changes: 0 additions & 11 deletions src/accelerate/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import inspect
import warnings

import torch

Expand Down Expand Up @@ -177,16 +176,6 @@ def _switch_parameters(self, parameters_map):
for param_group in self.optimizer.param_groups:
param_group["params"] = [parameters_map.get(p, p) for p in param_group["params"]]

@property
def is_overflow(self):
"""Whether or not the optimizer step was done, or skipped because of gradient overflow."""
warnings.warn(
"The `is_overflow` property is deprecated and will be removed in version 1.0 of Accelerate use "
"`optimizer.step_was_skipped` instead.",
FutureWarning,
)
return self._is_overflow

@property
def step_was_skipped(self):
"""Whether or not the optimizer step was skipped."""
Expand Down
10 changes: 0 additions & 10 deletions src/accelerate/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,16 +962,6 @@ def _check_initialized(self, mixed_precision=None, cpu=None):
):
raise ValueError(err.format(flag=f"mixed_precision='{mixed_precision}'"))

# For backward compatibility
@property
def use_fp16(self):
warnings.warn(
"The `use_fp16` property is deprecated and will be removed in version 1.0 of Accelerate use "
"`AcceleratorState.mixed_precision == 'fp16'` instead.",
FutureWarning,
)
return self._mixed_precision != "no"

@property
def mixed_precision(self):
if self.distributed_type == DistributedType.DEEPSPEED:
Expand Down
1 change: 0 additions & 1 deletion src/accelerate/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@
named_module_tensors,
retie_parameters,
set_module_tensor_to_device,
shard_checkpoint,
)
from .offload import (
OffloadedWeightsLoader,
Expand Down
1 change: 0 additions & 1 deletion src/accelerate/utils/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,6 @@ class DistributedType(str, enum.Enum):
- **MULTI_XPU** -- Distributed on multiple XPUs.
- **DEEPSPEED** -- Using DeepSpeed.
- **XLA** -- Using TorchXLA.
- **TPU** -- This field will be deprecated in v0.27.0. Use XLA instead.
"""

# Subclassing str as well as Enum allows the `DistributedType` to be JSON-serializable out of the box.
Expand Down
27 changes: 0 additions & 27 deletions src/accelerate/utils/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,28 +131,6 @@ def is_cuda_available():
return available


@lru_cache
def is_tpu_available(check_device=True):
"Checks if `torch_xla` is installed and potentially if a TPU is in the environment"
warnings.warn(
"`is_tpu_available` is deprecated and will be removed in v0.27.0. "
"Please use the `is_torch_xla_available` instead.",
FutureWarning,
)
# Due to bugs on the amp series GPUs, we disable torch-xla on them
if is_cuda_available():
return False
if check_device:
if _tpu_available:
try:
# Will raise a RuntimeError if no XLA configuration is found
_ = xm.xla_device()
return True
except RuntimeError:
return False
return _tpu_available


@lru_cache
def is_torch_xla_available(check_is_tpu=False, check_is_gpu=False):
"""
Expand Down Expand Up @@ -274,11 +252,6 @@ def is_boto3_available():

def is_rich_available():
if _is_package_available("rich"):
if "ACCELERATE_DISABLE_RICH" in os.environ:
warnings.warn(
"`ACCELERATE_DISABLE_RICH` is deprecated and will be removed in v0.22.0 and deactivated by default. Please use `ACCELERATE_ENABLE_RICH` if you wish to use `rich`."
)
return not parse_flag_from_env("ACCELERATE_DISABLE_RICH", False)
return parse_flag_from_env("ACCELERATE_ENABLE_RICH", False)
return False

Expand Down
8 changes: 0 additions & 8 deletions src/accelerate/utils/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import os
import subprocess
import sys
import warnings
from ast import literal_eval
from shutil import which
from typing import Any, Dict, List, Tuple
Expand Down Expand Up @@ -275,13 +274,6 @@ def prepare_multi_gpu_env(args: argparse.Namespace) -> Dict[str, str]:
current_env["FSDP_AUTO_WRAP_POLICY"] = str(args.fsdp_auto_wrap_policy)
if args.fsdp_transformer_layer_cls_to_wrap is not None:
current_env["FSDP_TRANSFORMER_CLS_TO_WRAP"] = str(args.fsdp_transformer_layer_cls_to_wrap)
if args.fsdp_backward_prefetch_policy is not None:
warnings.warn(
"`fsdp_backward_prefetch_policy` is deprecated and will be removed in version 0.27.0 of 🤗 Accelerate. Use"
" `fsdp_backward_prefetch` instead",
FutureWarning,
)
args.fsdp_backward_prefetch = args.fsdp_backward_prefetch_policy
if args.fsdp_backward_prefetch is not None:
current_env["FSDP_BACKWARD_PREFETCH"] = str(args.fsdp_backward_prefetch)
if args.fsdp_state_dict_type is not None:
Expand Down
92 changes: 4 additions & 88 deletions src/accelerate/utils/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,93 +208,6 @@ def id_tensor_storage(tensor: torch.Tensor) -> Tuple[torch.device, int, int]:
return tensor.device, storage_ptr, storage_size


def shard_checkpoint(
state_dict: Dict[str, torch.Tensor], max_shard_size: Union[int, str] = "10GB", weights_name: str = WEIGHTS_NAME
):
"""
Splits a model state dictionary in sub-checkpoints so that the final size of each sub-checkpoint does not exceed a
given size.
The sub-checkpoints are determined by iterating through the `state_dict` in the order of its keys, so there is no
optimization made to make each sub-checkpoint as close as possible to the maximum size passed. For example, if the
limit is 10GB and we have weights of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB],
[6+2+2GB] and not [6+2+2GB], [6+2GB], [6GB].
<Tip warning={true}>
If one of the model's weight is bigger that `max_sahrd_size`, it will end up in its own sub-checkpoint which will
have a size greater than `max_shard_size`.
</Tip>
Args:
state_dict (`Dict[str, torch.Tensor]`): The state dictionary of a model to save.
max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
The maximum size of each sub-checkpoint. If expressed as a string, needs to be digits followed by a unit
(like `"5MB"`).
weights_name (`str`, *optional*, defaults to `"pytorch_model.bin"`):
The name of the model save file.
"""
logger.warning(
"Note that `shard_checkpoint` is deprecated and will be removed in 0.33.0. We recommend you using "
"split_torch_state_dict_into_shards from huggingface_hub library"
)

max_shard_size = convert_file_size_to_int(max_shard_size)

sharded_state_dicts = [{}]
last_block_size = 0
total_size = 0
storage_id_to_block = {}

for key, weight in state_dict.items():
# when bnb serialization is used the weights in the state dict can be strings
# check: https://github.com/huggingface/transformers/pull/24416 for more details
if isinstance(weight, str):
continue
else:
storage_id = id_tensor_storage(weight)

# If a `weight` shares the same underlying storage as another tensor, we put `weight` in the same `block`
if storage_id in storage_id_to_block:
block_id = storage_id_to_block[storage_id]
sharded_state_dicts[block_id][key] = weight
continue

weight_size = weight.numel() * dtype_byte_size(weight.dtype)

# If this weight is going to tip up over the maximal size, we split.
if last_block_size + weight_size > max_shard_size:
sharded_state_dicts.append({})
last_block_size = 0

sharded_state_dicts[-1][key] = weight
last_block_size += weight_size
total_size += weight_size
storage_id_to_block[storage_id] = len(sharded_state_dicts) - 1

# If we only have one shard, we return it
if len(sharded_state_dicts) == 1:
return {weights_name: sharded_state_dicts[0]}, None

# Otherwise, let's build the index
weight_map = {}
shards = {}
for idx, shard in enumerate(sharded_state_dicts):
shard_file = weights_name.replace(".bin", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.bin")
shard_file = shard_file.replace(
".safetensors", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.safetensors"
)
shards[shard_file] = shard
for key in shard.keys():
weight_map[key] = shard_file

# Add the metadata
metadata = {"total_size": total_size}
index = {"metadata": metadata, "weight_map": weight_map}
return shards, index


def set_module_tensor_to_device(
module: nn.Module,
tensor_name: str,
Expand Down Expand Up @@ -559,7 +472,10 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def values(self):
# TODO: at the next Transformers release (4.28.0) issue a deprecation warning here.
warnings.warn(
"The 'values' method of FindTiedParametersResult is deprecated and will be removed in Accelerate v1.3.0. ",
FutureWarning,
)
return sum([x[1:] for x in self], [])


Expand Down
10 changes: 3 additions & 7 deletions src/accelerate/utils/tqdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import warnings

from .imports import is_tqdm_available

Expand All @@ -34,13 +33,10 @@ def tqdm(*args, main_process_only: bool = True, **kwargs):
if not is_tqdm_available():
raise ImportError("Accelerate's `tqdm` module requires `tqdm` to be installed. Please run `pip install tqdm`.")
if len(args) > 0 and isinstance(args[0], bool):
warnings.warn(
f"Passing `{args[0]}` as the first argument to Accelerate's `tqdm` wrapper is deprecated "
"and will be removed in v0.33.0. Please use the `main_process_only` keyword argument instead.",
FutureWarning,
raise ValueError(
"Passing `True` or `False` as the first argument to Accelerate's `tqdm` wrapper is unsupported. "
"Please use the `main_process_only` keyword argument instead."
)
main_process_only = args[0]
args = args[1:]
disable = kwargs.pop("disable", False)
if main_process_only and not disable:
disable = PartialState().local_process_index != 0
Expand Down
38 changes: 0 additions & 38 deletions tests/test_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from unittest.mock import patch

import psutil
import pytest
import torch
from parameterized import parameterized
from torch.utils.data import DataLoader, TensorDataset
Expand Down Expand Up @@ -126,43 +125,6 @@ def parameterized_custom_name_func(func, param_num, param):


class AcceleratorTester(AccelerateTestCase):
# Should be removed after 1.0.0 release
def test_deprecated_values(self):
# Test defaults
accelerator = Accelerator()
assert accelerator.split_batches is False, "split_batches should be False by default"
assert accelerator.dispatch_batches is None, "dispatch_batches should be None by default"
assert accelerator.even_batches is True, "even_batches should be True by default"
assert accelerator.use_seedable_sampler is False, "use_seedable_sampler should be False by default"

# Pass some arguments only
with pytest.warns(FutureWarning) as cm:
accelerator = Accelerator(
dispatch_batches=True,
split_batches=False,
)
deprecation_warning = str(cm.list[0].message)
assert accelerator.split_batches is False, "split_batches should be True"
assert accelerator.dispatch_batches is True, "dispatch_batches should be True"
assert accelerator.even_batches is True, "even_batches should be True by default"
assert accelerator.use_seedable_sampler is False, "use_seedable_sampler should be False by default"
assert "dispatch_batches" in deprecation_warning
assert "split_batches" in deprecation_warning
assert "even_batches" not in deprecation_warning
assert "use_seedable_sampler" not in deprecation_warning

# Pass in some arguments, but with their defaults
with pytest.warns(FutureWarning) as cm:
accelerator = Accelerator(
even_batches=True,
use_seedable_sampler=False,
)
deprecation_warning = str(cm.list[0].message)
assert "even_batches" in deprecation_warning
assert accelerator.even_batches is True
assert "use_seedable_sampler" in deprecation_warning
assert accelerator.use_seedable_sampler is False

def test_partial_state_after_reset(self):
# Verifies that custom getattr errors will be thrown
# if the state is reset, but only if trying to
Expand Down
Loading

0 comments on commit 79a8426

Please sign in to comment.