Skip to content

LLaMa vpp 组网 #10634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions llm/auto_parallel/llama/run_llama2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# just for debug

set -x
unset CUDA_VISIBLE_DEVICES

task_name="llama3_dp2pp4sd2"
rm -rf output/$task_name/
rm -rf "output/$task_name""_log"

export SOT_LOG_LEVEL=4
export PYTHONPATH=../../../:$PYTHONPATH
# export PYTHONPATH=/root/paddlejob/workspace/env_run/wangxiangzhe/Paddle/build/python:$PYTHONPATH
#ulimit -c unlimited
# export GLOG_v=6
export NCCL_DEBUG=INFO

# export FLAGS_call_stack_level=3
# export FLAGS_use_cuda_managed_memory=true

# export FLAGS_embedding_deterministic=1
# export FLAGS_cudnn_deterministic=1
# export NVIDIA_TF32_OVERRIDE=0
rm -rf core.*
python -u -m paddle.distributed.launch \
--gpus "0,1,2,3,4,5,6,7" \
--log_dir "output/$task_name""_log" \
./run_pretrain_auto.py \
/root/paddlejob/workspace/env_run/wangxiangzhe/PaddleNLP/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_13b/pretrain-llama2_13b.json

4 changes: 3 additions & 1 deletion llm/auto_parallel/llama/run_pretrain_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,16 @@
LinearAnnealingWithWarmupDecay,
LlamaConfig,
LlamaForCausalLM3DAuto,
LlamaForCausalLM3DAutoPP,
LlamaForCausalLMNet,
LlamaPretrainingCriterion3DAuto,
LlamaPretrainingCriterionNet,
)
from paddlenlp.utils.log import logger
from paddle.distributed.auto_parallel.pipelining.schedules import ScheduleGPipe

MODEL_CLASSES = {
"llama": (LlamaConfig, LlamaForCausalLM3DAuto, LlamaPretrainingCriterion3DAuto),
"llama": (LlamaConfig, LlamaForCausalLM3DAutoPP, LlamaPretrainingCriterion3DAuto),
"llama_network": (LlamaConfig, LlamaForCausalLMNet, LlamaPretrainingCriterionNet),
}

Expand Down
277 changes: 239 additions & 38 deletions paddlenlp/trainer/auto_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,191 @@
from ..quantization.quantization_linear import QuantizationLinear
except:
QuantizationLinear = None

from paddle.distributed.auto_parallel.pipelining.schedules import ScheduleGPipe, Schedule1F1B, ScheduleInterleaved1F1B
from paddle.distributed.auto_parallel.pipelining.stage import PipelineStage

Check warning on line 62 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L61-L62

Added lines #L61 - L62 were not covered by tests


MODEL_NAME = "model"
OPTIMIZER_NAME = "optimizer"
DIST_CKPT_PATH = "dist_ckpt"
DIST_MODEL_PATH = "dist_model"
FREE_SVAE_LOAD_KEY_PATTERNS = ["learning_rate_", "gradient_merge_", "@GRAD@MERG", "eager_tmp"]

is_split_model = False
local_stages = None

Check warning on line 72 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L71-L72

Added lines #L71 - L72 were not covered by tests

group0 = None
group1 = None
group2 = None
group3 = None

Check warning on line 77 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L74-L77

Added lines #L74 - L77 were not covered by tests

# class _Pipeline_model_chunk(nn.Layer):
# def __init__(self, layers):
# if not isinstance(layers, (list, tuple)):
# raise TypeError(
# f"Expected type of `layers` to be a list|tuple but got {type(layers)}."
# )
# self.layers = layers
# super(_Pipeline_model_chunk, self).__init__()
# def forward(self, *args, **kwargs):

# for layer in self.layers:
# output = layer(**args, **kwargs)
# return output

def manual_model_split(model,stage_idx,group):

Check warning on line 93 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L93

Added line #L93 was not covered by tests
global is_split_model
global local_stages

if is_split_model:
return local_stages
if stage_idx == 0:
for i in range(10):
del model.layers[10]
def forward0(

Check warning on line 102 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L97-L102

Added lines #L97 - L102 were not covered by tests
self,
input_ids=None,
labels=None,
position_ids=None,
attention_mask=None,
inputs_embeds=None,
use_cache=False,
past_key_values=None,
output_attentions=None,
output_hidden_states=None,
return_dict=None,
):
outputs = tuple([input_ids, attention_mask, position_ids])

Check warning on line 115 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L115

Added line #L115 was not covered by tests
# decoder layers
for idx, (decoder_layer) in enumerate(self.layers):
outputs = decoder_layer(outputs)
return outputs
setattr(model.__class__, "forward", forward0)

Check warning on line 120 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L117-L120

Added lines #L117 - L120 were not covered by tests

elif stage_idx == 1:
for i in range(10):
del model.layers[0]
def forward1(self, *args):
outputs = args

Check warning on line 126 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L122-L126

Added lines #L122 - L126 were not covered by tests
# decoder layers
for idx, (decoder_layer) in enumerate(self.layers):
outputs = decoder_layer(outputs)
return outputs
setattr(model.__class__, "forward", forward1)

Check warning on line 131 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L128-L131

Added lines #L128 - L131 were not covered by tests
else:
raise ValueError("Invalid stage index.")

Check warning on line 133 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L133

Added line #L133 was not covered by tests

stage = PipelineStage(

Check warning on line 135 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L135

Added line #L135 was not covered by tests
model,
stage_idx,
2,
group=group
)
is_split_model = True
local_stages = stage
return stage

Check warning on line 143 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L141-L143

Added lines #L141 - L143 were not covered by tests

def manual_model_split_multi(model,stage_idx,group):

Check warning on line 145 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L145

Added line #L145 was not covered by tests
global is_split_model
global local_stages

if is_split_model:
return local_stages
layer_lists = None
if stage_idx == 0:
for i in range(5):
del model.layers[5]
for i in range(5):
del model.layers[10]

Check warning on line 156 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L149-L156

Added lines #L149 - L156 were not covered by tests
else:
for i in range(5):
del model.layers[0]
for i in range(5):
del model.layers[5]
layer_lists = model.layers
def _build_stage(model, stage_idx, group):
new_model = []
if stage_idx == 0:
new_model = copy.deepcopy(model)
new_model.layers = layer_lists[:5]
def forward0(

Check warning on line 168 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L158-L168

Added lines #L158 - L168 were not covered by tests
self,
input_ids=None,
labels=None,
position_ids=None,
attention_mask=None,
inputs_embeds=None,
use_cache=False,
past_key_values=None,
output_attentions=None,
output_hidden_states=None,
return_dict=None,
):
outputs = tuple([input_ids, attention_mask, position_ids])

Check warning on line 181 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L181

Added line #L181 was not covered by tests
# decoder layers
for idx, (decoder_layer) in enumerate(self.layers):
outputs = decoder_layer(outputs)
return outputs

Check warning on line 185 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L183-L185

Added lines #L183 - L185 were not covered by tests
# setattr(model.__class__, "forward", forward0)
new_model.forward = forward0.__get__(new_model)

Check warning on line 187 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L187

Added line #L187 was not covered by tests

elif stage_idx == 1:
new_model = copy.deepcopy(model)

Check warning on line 190 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L189-L190

Added lines #L189 - L190 were not covered by tests

new_model.layers = layer_lists[:5]
def forward1(self, *args, **kwargs):
outputs = args if len(args) > 0 else kwargs

Check warning on line 194 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L192-L194

Added lines #L192 - L194 were not covered by tests
# decoder layers
for idx, (decoder_layer) in enumerate(self.layers):
outputs = decoder_layer(outputs)
return outputs

Check warning on line 198 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L196-L198

Added lines #L196 - L198 were not covered by tests
# setattr(model.__class__, "forward", forward1)
new_model.forward = forward1.__get__(new_model)

Check warning on line 200 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L200

Added line #L200 was not covered by tests

elif stage_idx == 2:
new_model = copy.deepcopy(model)

Check warning on line 203 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L202-L203

Added lines #L202 - L203 were not covered by tests

new_model.layers = layer_lists[5:]
def forward2(self, *args, **kwargs):
outputs = args if len(args) > 0 else kwargs

Check warning on line 207 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L205-L207

Added lines #L205 - L207 were not covered by tests
# decoder layers
for idx, (decoder_layer) in enumerate(self.layers):
outputs = decoder_layer(outputs)
return outputs

Check warning on line 211 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L209-L211

Added lines #L209 - L211 were not covered by tests
# setattr(model.__class__, "forward", forward2)
new_model.forward = forward2.__get__(new_model)
elif stage_idx == 3:
new_model = copy.deepcopy(model)

Check warning on line 215 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L213-L215

Added lines #L213 - L215 were not covered by tests

new_model.layers = layer_lists[5:]
def forward3(self, *args, **kwargs):
outputs = args if len(args) > 0 else kwargs

Check warning on line 219 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L217-L219

Added lines #L217 - L219 were not covered by tests
# decoder layers
for idx, (decoder_layer) in enumerate(self.layers):
outputs = decoder_layer(outputs)
return outputs

Check warning on line 223 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L221-L223

Added lines #L221 - L223 were not covered by tests
# setattr(model.__class__, "forward", forward3)
new_model.forward = forward3.__get__(new_model)

Check warning on line 225 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L225

Added line #L225 was not covered by tests
else:
raise ValueError("Invalid stage index.")

Check warning on line 227 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L227

Added line #L227 was not covered by tests

stage = PipelineStage(

Check warning on line 229 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L229

Added line #L229 was not covered by tests
new_model,
stage_idx,
4,
group=group
)
return stage
stages = []
stage = _build_stage(model, stage_idx, group)
stages.append(stage)
stage = _build_stage(model, stage_idx+2, group)
stages.append(stage)
is_split_model = True
local_stages = stages
return local_stages

Check warning on line 243 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L235-L243

Added lines #L235 - L243 were not covered by tests


class AutoTrainer(Trainer):
def __init__(self, *args, **kwargs):
Expand All @@ -88,7 +266,7 @@
), "if use AutoTrainer.parallel_model , auto_dist_config obtained from parallel_model should be passed to AutoTrainer "
self.auto_dist_config = kwargs.pop("auto_dist_config")
model = kwargs["model"]
for param in model.parameters():
for name, param in model.named_parameters():

Check warning on line 269 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L269

Added line #L269 was not covered by tests
# NOTE(zhangwl):in pipeline mode , param my be initialized before while delte init_func ,but param is still not is_initialized
if not param._is_initialized() and param._init_func is not None:
param.initialize()
Expand All @@ -99,6 +277,8 @@

self.global_mesh = fleet.auto.get_mesh()
self.comm_group_in_pp = fleet.get_hybrid_communicate_group().get_pipe_parallel_group()
# print("self.comm_group_in_pp: ", self.comm_group_in_pp)
# print("get_submesh_dim: ", self.global_mesh.get_submesh_with_dim("pp").get_group())
self._in_pir_mode = paddle.base.framework.get_flags("FLAGS_enable_pir_api")["FLAGS_enable_pir_api"]

@classmethod
Expand Down Expand Up @@ -670,50 +850,71 @@
labels = inputs["generator_labels"]
else:
labels = None
def get_mesh(pp_idx=0):
mesh = fleet.auto.get_mesh()
if "pp" in mesh.dim_names:
mesh = mesh.get_mesh_with_dim("pp", pp_idx)
return mesh
rank = dist.get_rank()
if rank == 0 or rank == 1 or rank == 2 or rank == 3:
stages = manual_model_split_multi(model, 0, self.comm_group_in_pp)

Check warning on line 860 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L853-L860

Added lines #L853 - L860 were not covered by tests
else:
stages = manual_model_split_multi(model, 1, self.comm_group_in_pp)

Check warning on line 862 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L862

Added line #L862 was not covered by tests

outputs = model(**inputs)

if self.criterion is not None:
schedule = ScheduleInterleaved1F1B(stages, n_microbatches = 2, loss_fn=self.criterion)

Check warning on line 864 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L864

Added line #L864 was not covered by tests

def to_list(value):
if value is None:
return value
if isinstance(value, (list, tuple)):
return list(value)
return [value]

criterion_inputs = to_list(outputs)
criterion_labels = to_list(labels)
loss = self.criterion(*(criterion_inputs + criterion_labels))
outputs = (loss, outputs)

# Save past state if it exists
# TODO: this needs to be fixed and made cleaner later.
if self.args.past_index >= 0:
self._past = outputs[self.args.past_index]

# We don't use .loss here since the model may return tuples instead of ModelOutput.
loss = outputs["loss"] if isinstance(outputs, dict) else outputs
if isinstance(outputs, dict):
loss = outputs["loss"]
elif isinstance(outputs, tuple):
loss = outputs[0]
if rank == 0 or rank == 1 or rank == 2 or rank == 3:
inputs["input_ids"] = dist.reshard(inputs["input_ids"], get_mesh(0), [dist.Replicate(), dist.Replicate()])
schedule.step(**inputs)

Check warning on line 868 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L866-L868

Added lines #L866 - L868 were not covered by tests
else:
loss = outputs

return (loss, outputs) if return_outputs else loss
labels = dist.reshard(labels, get_mesh(1), [dist.Replicate(), dist.Replicate()])
losses = []
schedule.step(target=labels, losses = losses)
print("losses: ", losses)
return 0

Check warning on line 874 in paddlenlp/trainer/auto_trainer.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/trainer/auto_trainer.py#L870-L874

Added lines #L870 - L874 were not covered by tests
# outputs = model(**inputs)

# if self.criterion is not None:

# def to_list(value):
# if value is None:
# return value
# if isinstance(value, (list, tuple)):
# return list(value)
# return [value]

# criterion_inputs = to_list(outputs)
# criterion_labels = to_list(labels)
# loss = self.criterion(*(criterion_inputs + criterion_labels))
# outputs = (loss, outputs)

# # Save past state if it exists
# # TODO: this needs to be fixed and made cleaner later.
# if self.args.past_index >= 0:
# self._past = outputs[self.args.past_index]

# # We don't use .loss here since the model may return tuples instead of ModelOutput.
# loss = outputs["loss"] if isinstance(outputs, dict) else outputs
# if isinstance(outputs, dict):
# loss = outputs["loss"]
# elif isinstance(outputs, tuple):
# loss = outputs[0]
# else:
# loss = outputs

# return (loss, outputs) if return_outputs else loss

def dynamic_training(self, model: nn.Layer, inputs: Dict[str, Union[paddle.Tensor, Any]]) -> paddle.Tensor:
with self.autocast_smart_context_manager():
loss = self.compute_loss(model, inputs)

if loss is not None and self.args.gradient_accumulation_steps > 1 and not self._enable_delay_scale_loss():
loss = loss / self.args.gradient_accumulation_steps

if self.do_grad_scaling:
self.scaler.scale(loss).backward()
else:
loss.backward()
# if loss is not None and self.args.gradient_accumulation_steps > 1 and not self._enable_delay_scale_loss():
# loss = loss / self.args.gradient_accumulation_steps

# if self.do_grad_scaling:
# self.scaler.scale(loss).backward()
# else:
# loss.backward()

return loss

Expand Down
1 change: 1 addition & 0 deletions paddlenlp/transformers/llama/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .configuration import *
from .modeling import *
from .modeling_auto import *
from .modeling_auto_pp import *
from .modeling_network import *
from .modeling_pp import *
from .tokenizer import *
Expand Down
Loading
Loading