You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
What's the issue, what's expected?:
General question I can't seem to find, does MS-AMP support ZeRO-3? Or only ZeRO-1 and ZeRO-2. (It's fine if not and this should be obvious to a user :) )
AttributeError: 'ScalingParameter' object has no attribute 'item'
Traceback (most recent call last):
File "/mnt/work/accelerate/benchmarks/fp8/ms_amp/distrib_deepspeed.py", line 168, in
baseline_not_trained, baseline_trained = train_baseline(3, "O1")
File "/mnt/work/accelerate/benchmarks/fp8/ms_amp/distrib_deepspeed.py", line 85, in train_baseline
) = deepspeed.initialize(
File "/mnt/work/MS-AMP/msamp/deepspeed/__init__.py", line 135, in initialize
engine = MSAMPDeepSpeedEngine(
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/engine.py", line 308, in __init__
self._configure_optimizer(optimizer, model_parameters)
File "/mnt/work/MS-AMP/msamp/deepspeed/runtime/engine.py", line 104, in _configure_optimizer
self.optimizer = self._configure_zero_optimizer(basic_optimizer)
File "/mnt/work/MS-AMP/msamp/deepspeed/runtime/engine.py", line 283, in _configure_zero_optimizer
optimizer = DeepSpeedZeroOptimizer_Stage3(
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/stage3.py", line 177, in __init__
self.parameter_offload = self.initialize_ds_offload(
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/stage3.py", line 423, in initialize_ds_offload
return DeepSpeedZeRoOffload(module=module,
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/parameter_offload.py", line 241, in __init__
self._convert_to_zero_parameters(ds_config, module, mpu)
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/parameter_offload.py", line 316, in _convert_to_zero_parameters
Init(module=module,
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py", line 996, in __init__
self._convert_to_zero_parameters(module.parameters(recurse=True))
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py", line 1021, in _convert_to_zero_parameters
self._zero_init_param(param)
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py", line 1008, in _zero_init_param
self._convert_to_deepspeed_param(param)
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py", line 1404, in _convert_to_deepspeed_param
param.item = allgather_before(param.item)
AttributeError: 'ScalingParameter' object has no attribute 'item'
O3:
AssertionError: MS-AMP O3 requires ZeRO with optimizer_states or gradients partitioning.
Traceback (most recent call last):
File "/mnt/work/accelerate/benchmarks/fp8/ms_amp/distrib_deepspeed.py", line 168, in
baseline_not_trained, baseline_trained = train_baseline(3, "O3")
File "/mnt/work/accelerate/benchmarks/fp8/ms_amp/distrib_deepspeed.py", line 85, in train_baseline
) = deepspeed.initialize(
File "/mnt/work/MS-AMP/msamp/deepspeed/__init__.py", line 119, in initialize
config_class = MSAMPDeepSpeedConfig(config, mpu)
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/config.py", line 787, in __init__
self._do_sanity_check()
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/config.py", line 969, in _do_sanity_check
self._do_error_check()
File "/mnt/work/MS-AMP/msamp/deepspeed/runtime/config.py", line 43, in _do_error_check
self.zero_optimization_stage in [ZeroStageEnum.optimizer_states, ZeroStageEnum.gradients], \
AssertionError: MS-AMP O3 requires ZeRO with optimizer_states or gradients partitioning.
The text was updated successfully, but these errors were encountered:
What's the issue, what's expected?:
General question I can't seem to find, does MS-AMP support ZeRO-3? Or only ZeRO-1 and ZeRO-2. (It's fine if not and this should be obvious to a user :) )
I ran into some issues trying to run Zero-3:
Relevant Errors
O1 & O2:
AttributeError: 'ScalingParameter' object has no attribute 'item'
Traceback (most recent call last): File "/mnt/work/accelerate/benchmarks/fp8/ms_amp/distrib_deepspeed.py", line 168, in baseline_not_trained, baseline_trained = train_baseline(3, "O1") File "/mnt/work/accelerate/benchmarks/fp8/ms_amp/distrib_deepspeed.py", line 85, in train_baseline ) = deepspeed.initialize( File "/mnt/work/MS-AMP/msamp/deepspeed/__init__.py", line 135, in initialize engine = MSAMPDeepSpeedEngine( File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/engine.py", line 308, in __init__ self._configure_optimizer(optimizer, model_parameters) File "/mnt/work/MS-AMP/msamp/deepspeed/runtime/engine.py", line 104, in _configure_optimizer self.optimizer = self._configure_zero_optimizer(basic_optimizer) File "/mnt/work/MS-AMP/msamp/deepspeed/runtime/engine.py", line 283, in _configure_zero_optimizer optimizer = DeepSpeedZeroOptimizer_Stage3( File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/stage3.py", line 177, in __init__ self.parameter_offload = self.initialize_ds_offload( File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/stage3.py", line 423, in initialize_ds_offload return DeepSpeedZeRoOffload(module=module, File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/parameter_offload.py", line 241, in __init__ self._convert_to_zero_parameters(ds_config, module, mpu) File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/parameter_offload.py", line 316, in _convert_to_zero_parameters Init(module=module, File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py", line 996, in __init__ self._convert_to_zero_parameters(module.parameters(recurse=True)) File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py", line 1021, in _convert_to_zero_parameters self._zero_init_param(param) File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py", line 1008, in _zero_init_param self._convert_to_deepspeed_param(param) File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py", line 1404, in _convert_to_deepspeed_param param.item = allgather_before(param.item) AttributeError: 'ScalingParameter' object has no attribute 'item'
O3:
AssertionError: MS-AMP O3 requires ZeRO with optimizer_states or gradients partitioning.
Traceback (most recent call last): File "/mnt/work/accelerate/benchmarks/fp8/ms_amp/distrib_deepspeed.py", line 168, in baseline_not_trained, baseline_trained = train_baseline(3, "O3") File "/mnt/work/accelerate/benchmarks/fp8/ms_amp/distrib_deepspeed.py", line 85, in train_baseline ) = deepspeed.initialize( File "/mnt/work/MS-AMP/msamp/deepspeed/__init__.py", line 119, in initialize config_class = MSAMPDeepSpeedConfig(config, mpu) File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/config.py", line 787, in __init__ self._do_sanity_check() File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/config.py", line 969, in _do_sanity_check self._do_error_check() File "/mnt/work/MS-AMP/msamp/deepspeed/runtime/config.py", line 43, in _do_error_check self.zero_optimization_stage in [ZeroStageEnum.optimizer_states, ZeroStageEnum.gradients], \ AssertionError: MS-AMP O3 requires ZeRO with optimizer_states or gradients partitioning.
The text was updated successfully, but these errors were encountered: