diff --git a/.github/workflows/gpu_tests.yaml b/.github/workflows/gpu_tests.yaml index 6ed27200a..76a8b1cbd 100644 --- a/.github/workflows/gpu_tests.yaml +++ b/.github/workflows/gpu_tests.yaml @@ -6,7 +6,7 @@ on: - main pull_request: paths: - - '.github/workflows/**.yaml' + - '.github/workflows/gpu_tests.yaml' - 'pippy/**' - 'test/**' - 'examples/**' diff --git a/.github/workflows/model_tests.yaml b/.github/workflows/model_tests.yaml new file mode 100644 index 000000000..aa591c4dc --- /dev/null +++ b/.github/workflows/model_tests.yaml @@ -0,0 +1,85 @@ +name: Model Tests +# Run models in `examples` folder + +on: + # Run when any example is changed + pull_request: + paths: + - '.github/workflows/model_tests.yaml' + - 'examples/**' + # Nightly run against pytorch nightly build + schedule: + - cron: "30 11 * * *" # Everyday 11:30 am UTC, i.e. 4:30 am PST + +concurrency: + # Cancel CI on previous commit when a new commit is pushed to the same branch + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash -l -eo pipefail {0} + +jobs: + model_tests_4gpu: + runs-on: linux.g5.12xlarge.nvidia.gpu + strategy: + matrix: + python-version: ['3.10'] + steps: + - name: Check out repo + uses: actions/checkout@v3 + - name: Setup conda env + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + miniconda-version: "latest" + activate-environment: test + python-version: ${{ matrix.python-version }} + - name: Activate conda env + run: conda activate test + - name: Install dependencies + run: | + pip install --pre -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cu121/torch_nightly.html + - name: Install Transformers for getting models + run: pip install transformers + - name: Run GPT2 + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_gpt2.py + - name: Run BERT + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_bert.py + - name: Run blenderbot + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_blenderbot.py + - name: Run camemBert + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_camemBert.py + - name: Run convBert + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_convBert.py + - name: Run deberta + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_deberta.py + # - name: Run debertaV2 + # run: torchrun --nproc-per-node 4 examples/huggingface/pippy_debertaV2.py + - name: Run distilBert + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_distilBert.py + - name: Run electra + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_electra.py + - name: Run fnet + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_fnet.py + - name: Run gptNeo + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_gptNeo.py + - name: Run layoutLM + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_layoutLM.py + - name: Run mbart + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_mbart.py + - name: Run megatronBert + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_megatronBert.py + - name: Run mobileBert + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_mobileBert.py + # - name: Run opt + # run: torchrun --nproc-per-node 2 examples/huggingface/pippy_opt.py + - name: Run trOCR + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_trOCR.py + - name: Run unet + run: torchrun --nproc-per-node 2 examples/huggingface/pippy_unet.py + - name: Run xlnet + run: torchrun --nproc-per-node 4 examples/huggingface/pippy_xlnet.py + - name: Test CPU init + GPU run + run: torchrun --nproc-per-node 4 examples/cpu_init/gpt2_cpu_init.py diff --git a/.github/workflows/pippy_tests.yaml b/.github/workflows/pippy_tests.yaml index ccb7e27b0..635ca171f 100644 --- a/.github/workflows/pippy_tests.yaml +++ b/.github/workflows/pippy_tests.yaml @@ -6,7 +6,7 @@ on: - main pull_request: paths: - - '.github/workflows/**.yaml' + - '.github/workflows/pippy_tests.yaml' - 'pippy/**' - 'test/**' - 'examples/**' diff --git a/examples/cpu_init/gpt2_cpu_init.py b/examples/cpu_init/gpt2_cpu_init.py index 788aa6bb9..ce2b6f2e3 100644 --- a/examples/cpu_init/gpt2_cpu_init.py +++ b/examples/cpu_init/gpt2_cpu_init.py @@ -8,20 +8,11 @@ import torch import torch.distributed as dist - -from pippy import pipeline, PipelineStage, SplitPoint, annotate_split_points -from pippy.PipelineSchedule import ScheduleGPipe +from torch.distributed.pipelining import pipeline, PipelineStage, ScheduleGPipe, SplitPoint from transformers import GPT2ForSequenceClassification, GPT2Config -def add_split_points(gpt2, nranks): - layers_per_rank = gpt2.config.num_hidden_layers // nranks - for i in range(1, nranks): - annotate_split_points( - gpt2, {f"transformer.h.{i * layers_per_rank}": SplitPoint.BEGINNING}) - - def run(args): # Model configs config = GPT2Config() @@ -45,20 +36,27 @@ def run(args): requires_grad=False, ) - # Annotate split points - add_split_points(gpt2, args.world_size) + # Split spec + decoders_per_rank = (gpt2.config.n_layer + args.world_size - 1) // args.world_size + print(f"decoders_per_rank = {decoders_per_rank}") + split_spec = { + f'transformer.h.{i * decoders_per_rank}': SplitPoint.BEGINNING + for i in range(1, args.world_size) + } # Create pipeline - gpt2_pipe = pipeline( + pipe = pipeline( gpt2, num_chunks=args.chunks, example_args=(example_input,), + split_spec=split_spec, ) - assert gpt2_pipe.num_stages == args.world_size, f"nstages = {gpt2_pipe.num_stages} nranks = {args.world_size}" + + assert pipe.num_stages == args.world_size, f"nstages = {pipe.num_stages} nranks = {args.world_size}" # Create schedule runtime stage = PipelineStage( - gpt2_pipe, + pipe, args.rank, device=args.device, )