Skip to content

Commit

Permalink
Merge branch 'huggingface:main' into cj_fix_deepspeed_multinode_mpi_l…
Browse files Browse the repository at this point in the history
…aunchers
  • Loading branch information
chiragjn authored Jan 27, 2025
2 parents b2b65c5 + 675e35b commit e287911
Show file tree
Hide file tree
Showing 12 changed files with 54 additions and 28 deletions.
18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,21 @@ test_prod:

test_rest:
python -m pytest -s -v ./tests/test_examples.py::FeatureExamplesTests -k "not by_step and not by_epoch" $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_rest.log",)

# For developers to prepare a release
prepare_release:
rm -rf dist build
python setup.py bdist_wheel sdist

# Make sure this is ran in a fresh venv of some form
install_test_release:
pip uninstall accelerate -y
pip install -i https://testpypi.python.org/pypi --extra-index-url https://pypi.org/simple accelerate

# Run as `make target=testpypi upload_release`
upload_release:
@if [ "$(target)" != "testpypi" ] && [ "$(target)" != "pypi" ]; then \
echo "Error: target must be either 'testpypi' or 'pypi'"; \
exit 1; \
fi
twine upload dist/* -r $(target)
13 changes: 4 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,20 +103,15 @@
# git tag v<VERSION> -m 'Adds tag v<VERSION> for pypi'
# Push the tag and release commit to git: git push --tags origin vXX.xx-release
# 5. Run the following commands in the top-level directory:
# rm -rf dist
# rm -rf build
# python setup.py bdist_wheel
# python setup.py sdist
# make prepare_release
# 6. Upload the package to the pypi test server first:
# twine upload dist/* -r testpypi
# make target=testpypi upload_release
# 7. Check that you can install it in a virtualenv by running:
# pip install accelerate
# pip uninstall accelerate
# pip install -i https://testpypi.python.org/pypi accelerate
# make install_test_release
# accelerate env
# accelerate test
# 8. Upload the final version to actual pypi:
# twine upload dist/* -r pypi
# make target=pypi upload_release
# 9. Add release notes to the tag in github once everything is looking hunky-dory.
# 10. Go back to the main branch and update the version in __init__.py, setup.py to the new version ".dev" and push to
# main.
4 changes: 2 additions & 2 deletions src/accelerate/accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,8 @@ def __init__(
if compare_versions("deepspeed-mlu", "<", "0.10.1"):
raise ImportError("DeepSpeed MLU version must be >= 0.10.1. Please update DeepSpeed MLU.")
elif is_musa_available():
if compare_versions("deepspeed", ">", "0.14.3"):
raise ImportError("DeepSpeed MUSA version must be <= 0.14.3. Please downgrade DeepSpeed.")
if compare_versions("deepspeed", "<", "0.14.3"):
raise ImportError("DeepSpeed MUSA version must be >= 0.14.3. Please update DeepSpeed.")
elif compare_versions("deepspeed", "<", "0.9.3"):
raise ImportError("DeepSpeed version must be >= 0.9.3. Please update DeepSpeed.")

Expand Down
5 changes: 5 additions & 0 deletions src/accelerate/checkpointing.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
WEIGHTS_NAME,
get_pretty_name,
is_mlu_available,
is_musa_available,
is_torch_xla_available,
is_xpu_available,
load,
Expand Down Expand Up @@ -152,6 +153,8 @@ def save_accelerator_state(
states["torch_xpu_manual_seed"] = torch.xpu.get_rng_state_all()
if is_mlu_available():
states["torch_mlu_manual_seed"] = torch.mlu.get_rng_state_all()
if is_musa_available():
states["torch_musa_manual_seed"] = torch.musa.get_rng_state_all()
else:
states["torch_cuda_manual_seed"] = torch.cuda.get_rng_state_all()
if is_torch_xla_available():
Expand Down Expand Up @@ -275,6 +278,8 @@ def load_accelerator_state(
torch.xpu.set_rng_state_all(states["torch_xpu_manual_seed"])
if is_mlu_available():
torch.mlu.set_rng_state_all(states["torch_mlu_manual_seed"])
if is_musa_available():
torch.musa.set_rng_state_all(states["torch_musa_manual_seed"])
else:
torch.cuda.set_rng_state_all(states["torch_cuda_manual_seed"])
if is_torch_xla_available():
Expand Down
2 changes: 2 additions & 0 deletions src/accelerate/commands/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ def env_command(args):
info["GPU type"] = torch.cuda.get_device_name()
if pt_mlu_available:
info["MLU type"] = torch.mlu.get_device_name()
if pt_musa_available:
info["MUSA type"] = torch.musa.get_device_name()
if pt_npu_available:
info["CANN version"] = torch.version.cann

Expand Down
3 changes: 3 additions & 0 deletions src/accelerate/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)
from .utils.imports import (
is_mlu_available,
is_musa_available,
is_npu_available,
is_xpu_available,
)
Expand Down Expand Up @@ -391,6 +392,8 @@ def post_forward(self, module, output):
device = f"npu:{device}"
elif is_mlu_available():
device = f"mlu:{device}"
elif is_musa_available():
device = f"musa:{device}"
elif is_xpu_available():
device = f"xpu:{device}"
del self.tied_params_map[value_pointer][device]
Expand Down
3 changes: 3 additions & 0 deletions src/accelerate/utils/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
is_cuda_available,
is_mlu_available,
is_msamp_available,
is_musa_available,
is_npu_available,
is_transformer_engine_available,
is_xpu_available,
Expand Down Expand Up @@ -1686,6 +1687,8 @@ def __post_init__(self):
device = torch.npu.current_device()
elif is_mlu_available():
device = torch.mlu.current_device()
elif is_musa_available():
device = torch.musa.current_device()
elif is_cuda_available():
device = torch.cuda.current_device()
elif is_xpu_available():
Expand Down
11 changes: 7 additions & 4 deletions tests/test_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
from accelerate.state import GradientState, PartialState
from accelerate.test_utils import (
require_bnb,
require_cuda_or_xpu,
require_huggingface_suite,
require_multi_gpu,
require_multi_device,
require_non_cpu,
require_transformer_engine,
slow,
Expand Down Expand Up @@ -452,7 +453,7 @@ def test_is_accelerator_prepared(self):
getattr(valid_dl, "_is_accelerate_prepared", False) is True
), "Valid Dataloader is missing `_is_accelerator_prepared` or is set to `False`"

@require_cuda
@require_cuda_or_xpu
@slow
@require_bnb
def test_accelerator_bnb(self):
Expand Down Expand Up @@ -498,7 +499,7 @@ def test_accelerator_bnb_cpu_error(self):
@require_non_torch_xla
@slow
@require_bnb
@require_multi_gpu
@require_multi_device
def test_accelerator_bnb_multi_device(self):
"""Tests that the accelerator can be used with the BNB library."""
from transformers import AutoModelForCausalLM
Expand All @@ -507,6 +508,8 @@ def test_accelerator_bnb_multi_device(self):
PartialState._shared_state = {"distributed_type": DistributedType.MULTI_GPU}
elif torch_device == "npu":
PartialState._shared_state = {"distributed_type": DistributedType.MULTI_NPU}
elif torch_device == "xpu":
PartialState._shared_state = {"distributed_type": DistributedType.MULTI_XPU}
else:
raise ValueError(f"{torch_device} is not supported in test_accelerator_bnb_multi_device.")

Expand Down Expand Up @@ -534,7 +537,7 @@ def test_accelerator_bnb_multi_device(self):
@require_non_torch_xla
@slow
@require_bnb
@require_multi_gpu
@require_multi_device
def test_accelerator_bnb_multi_device_no_distributed(self):
"""Tests that the accelerator can be used with the BNB library."""
from transformers import AutoModelForCausalLM
Expand Down
15 changes: 8 additions & 7 deletions tests/test_big_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from accelerate.test_utils import (
require_bnb,
require_cuda,
require_cuda_or_xpu,
require_multi_device,
require_multi_gpu,
require_non_cpu,
Expand Down Expand Up @@ -877,7 +878,7 @@ def test_cpu_offload_with_hook(self):
@require_non_torch_xla
@slow
@require_bnb
@require_multi_gpu
@require_multi_device
def test_dispatch_model_bnb(self):
"""Tests that `dispatch_model` quantizes int8 layers"""
from huggingface_hub import hf_hub_download
Expand Down Expand Up @@ -906,7 +907,7 @@ def test_dispatch_model_bnb(self):
assert model.h[(-1)].self_attention.query_key_value.weight.dtype == torch.int8
assert model.h[(-1)].self_attention.query_key_value.weight.device.index == 1

@require_cuda
@require_cuda_or_xpu
@slow
@require_bnb
def test_dispatch_model_int8_simple(self):
Expand Down Expand Up @@ -946,7 +947,7 @@ def test_dispatch_model_int8_simple(self):
model = load_checkpoint_and_dispatch(
model,
checkpoint=model_path,
device_map={"": torch.device("cuda:0")},
device_map={"": torch_device},
)

assert model.h[0].self_attention.query_key_value.weight.dtype == torch.int8
Expand All @@ -963,13 +964,13 @@ def test_dispatch_model_int8_simple(self):
model = load_checkpoint_and_dispatch(
model,
checkpoint=model_path,
device_map={"": "cuda:0"},
device_map={"": torch_device},
)

assert model.h[0].self_attention.query_key_value.weight.dtype == torch.int8
assert model.h[0].self_attention.query_key_value.weight.device.index == 0

@require_cuda
@require_cuda_or_xpu
@slow
@require_bnb
def test_dipatch_model_fp4_simple(self):
Expand Down Expand Up @@ -1010,7 +1011,7 @@ def test_dipatch_model_fp4_simple(self):
model = load_checkpoint_and_dispatch(
model,
checkpoint=model_path,
device_map={"": torch.device("cuda:0")},
device_map={"": torch_device},
)

assert model.h[0].self_attention.query_key_value.weight.dtype == torch.uint8
Expand All @@ -1027,7 +1028,7 @@ def test_dipatch_model_fp4_simple(self):
model = load_checkpoint_and_dispatch(
model,
checkpoint=model_path,
device_map={"": "cuda:0"},
device_map={"": torch_device},
)

assert model.h[0].self_attention.query_key_value.weight.dtype == torch.uint8
Expand Down
2 changes: 0 additions & 2 deletions tests/test_kwargs_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
path_in_accelerate_package,
require_multi_device,
require_non_cpu,
require_non_xpu,
)
from accelerate.test_utils.testing import slow
from accelerate.utils import AutocastKwargs, KwargsHandler, ProfileKwargs, TorchDynamoPlugin, clear_environment
Expand All @@ -50,7 +49,6 @@ def test_kwargs_handler(self):
assert MockClass(a=2, c=2.25).to_kwargs() == {"a": 2, "c": 2.25}

@require_non_cpu
@require_non_xpu
def test_grad_scaler_kwargs(self):
# If no defaults are changed, `to_kwargs` returns an empty dict.
scaler_handler = GradScalerKwargs(init_scale=1024, growth_factor=2)
Expand Down
3 changes: 1 addition & 2 deletions tests/test_modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from accelerate import init_empty_weights
from accelerate.big_modeling import cpu_offload
from accelerate.test_utils import (
require_cuda,
require_huggingface_suite,
require_multi_device,
require_non_cpu,
Expand Down Expand Up @@ -853,7 +852,7 @@ def test_infer_auto_device_map_with_fallback_allocation_and_buffers(self):
expected_device_map = {"batchnorm": 0, "linear1": "disk", "linear2": "disk"}
assert device_map == expected_device_map

@require_cuda
@require_non_cpu
def test_get_balanced_memory(self):
model = ModelForTest()
# model has size 236: linear1 64, batchnorm 72, linear2 100
Expand Down
3 changes: 1 addition & 2 deletions tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from accelerate import Accelerator
from accelerate.state import AcceleratorState
from accelerate.test_utils import require_cpu, require_non_cpu, require_non_xpu
from accelerate.test_utils import require_cpu, require_non_cpu


@require_cpu
Expand All @@ -37,7 +37,6 @@ def test_accelerated_optimizer_pickling(self):


@require_non_cpu
@require_non_xpu
class OptimizerTester(unittest.TestCase):
def test_accelerated_optimizer_step_was_skipped(self):
model = torch.nn.Linear(5, 5)
Expand Down

0 comments on commit e287911

Please sign in to comment.