diff --git a/tests/test_tipc/static/auto_parallel/baichuan2/N4C32/baichuan-inc-baichuan-2-13b_pretrain_dynamic_auto_bs32_bf16_DP1_MP4_PP1_Sharding8_Stage1.sh b/tests/test_tipc/static/auto_parallel/baichuan2/N4C32/baichuan-inc-baichuan-2-13b_pretrain_dynamic_auto_bs32_bf16_DP1_MP4_PP1_Sharding8_Stage1.sh index 97c16d163222..5b6e47a85f73 100644 --- a/tests/test_tipc/static/auto_parallel/baichuan2/N4C32/baichuan-inc-baichuan-2-13b_pretrain_dynamic_auto_bs32_bf16_DP1_MP4_PP1_Sharding8_Stage1.sh +++ b/tests/test_tipc/static/auto_parallel/baichuan2/N4C32/baichuan-inc-baichuan-2-13b_pretrain_dynamic_auto_bs32_bf16_DP1_MP4_PP1_Sharding8_Stage1.sh @@ -21,6 +21,8 @@ param+="model_type=baichuan2_13b " param+='dynamic_auto=_dynamic_auto ' export FLAGS_fuse_reducescatter_in_opt=1 +export FLAGS_enable_sharding_overlap=1 +export FLAGS_enable_tensor_fusion=1 cd ./tests bash ./test_tipc/static/auto_parallel/baichuan2/benchmark_common/prepare.sh diff --git a/tests/test_tipc/static/auto_parallel/baichuan2/pretrain_config_baichuan2_13b/pretrain-baichuan2_13b_dynamic_auto.json b/tests/test_tipc/static/auto_parallel/baichuan2/pretrain_config_baichuan2_13b/pretrain-baichuan2_13b_dynamic_auto.json index 459df22ad2f8..e4b53b816926 100644 --- a/tests/test_tipc/static/auto_parallel/baichuan2/pretrain_config_baichuan2_13b/pretrain-baichuan2_13b_dynamic_auto.json +++ b/tests/test_tipc/static/auto_parallel/baichuan2/pretrain_config_baichuan2_13b/pretrain-baichuan2_13b_dynamic_auto.json @@ -42,15 +42,15 @@ "use_flash_attention": true, "fused_linear": 1, "fused_linear_param_grad_add": 1, + "enable_linear_fused_grad_add": true, "use_fused_rope": true, "use_fused_rms_norm": true, "max_seq_length": 4096, - "sequence_parallel": 1, + "sequence_parallel": 0, "sharding": "stage1", "sharding_parallel_degree": 8, "sharding_parallel_config": "enable_tensor_fusion enable_overlap", "tensor_parallel_config": "enable_mp_async_allreduce replace_with_parallel_cross_entropy", "data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate", - "pipeline_parallel_config": "enable_send_recv_overlap enable_split_backward", - "num_hidden_layers": 20 + "pipeline_parallel_config": "enable_send_recv_overlap enable_split_backward" } \ No newline at end of file