From aed29d6533e8ff86686fdb6fafa6d0b720e9e5f6 Mon Sep 17 00:00:00 2001 From: Yuxiang Yang Date: Wed, 23 Aug 2023 22:36:22 +0800 Subject: [PATCH] [Bugfix]Change clip_grad_norm_fp8 to clip_grad_norm_fp32 (#97) **Description** Description The function name of clip_grad_norm_fp8 in megatron/init.py is wrong. It should be clip_grad_norm_fp32. The gpt3 example can not run successfully without this PR merged. --- msamp/megatron/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/msamp/megatron/__init__.py b/msamp/megatron/__init__.py index 74954e73..1932453a 100644 --- a/msamp/megatron/__init__.py +++ b/msamp/megatron/__init__.py @@ -3,7 +3,7 @@ """Expose the interface of MS-AMP megatron package.""" -from msamp.megatron.optimizer.clip_grads import clip_grad_norm_fp8 +from msamp.megatron.optimizer.clip_grads import clip_grad_norm_fp32 from msamp.megatron.distributed import FP8DistributedDataParallel from msamp.common.utils.lazy_import import LazyImport @@ -13,6 +13,6 @@ FP8DistributedOptimizer = LazyImport('msamp.megatron.optimizer.distrib_optimizer', 'FP8DistributedOptimizer') __all__ = [ - 'clip_grad_norm_fp8', 'FP8DistributedDataParallel', 'FP8LinearWithGradAccumulationAndAsyncCommunication', + 'clip_grad_norm_fp32', 'FP8DistributedDataParallel', 'FP8LinearWithGradAccumulationAndAsyncCommunication', 'FP8DistributedOptimizer' ]