Update on "Add nightly model tests against pytorch"

[ghstack-poisoned]
pytorch · May 29, 2024 · 3a61fb0 · 3a61fb0
1 parent 595b44f
commit 3a61fb0
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 15 deletions.
diff --git a/.github/workflows/model_tests.yaml b/.github/workflows/model_tests.yaml
@@ -13,6 +13,11 @@ on:
   schedule:
     - cron: "30 11 * * *"   # Everyday 11:30 am UTC, i.e. 4:30 am PST
 
+concurrency:
+  # Cancel CI on previous commit when a new commit is pushed to the same branch
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 defaults:
   run:
     shell: bash -l -eo pipefail {0}

diff --git a/examples/cpu_init/gpt2_cpu_init.py b/examples/cpu_init/gpt2_cpu_init.py
@@ -8,20 +8,11 @@
 
 import torch
 import torch.distributed as dist
-
-from pippy import pipeline, PipelineStage, SplitPoint, annotate_split_points
-from pippy.PipelineSchedule import ScheduleGPipe
+from torch.distributed.pipelining import pipeline, PipelineStage, ScheduleGPipe, SplitPoint
 
 from transformers import GPT2ForSequenceClassification, GPT2Config
 
 
-def add_split_points(gpt2, nranks):
-    layers_per_rank = gpt2.config.num_hidden_layers // nranks
-    for i in range(1, nranks):
-        annotate_split_points(
-            gpt2, {f"transformer.h.{i * layers_per_rank}": SplitPoint.BEGINNING})
-
-
 def run(args):
     # Model configs
     config = GPT2Config()
@@ -45,20 +36,27 @@ def run(args):
         requires_grad=False,
     )
 
-    # Annotate split points
-    add_split_points(gpt2, args.world_size)
+    # Split spec
+    decoders_per_rank = (gpt2.config.n_layer + args.world_size - 1) // args.world_size
+    print(f"decoders_per_rank = {decoders_per_rank}")
+    split_spec = {
+        f'transformer.h.{i * decoders_per_rank}': SplitPoint.BEGINNING
+        for i in range(1, args.world_size)
+    }
 
     # Create pipeline
-    gpt2_pipe = pipeline(
+    pipe = pipeline(
         gpt2,
         num_chunks=args.chunks,
         example_args=(example_input,),
+        split_spec=split_spec,
     )
-    assert gpt2_pipe.num_stages == args.world_size, f"nstages = {gpt2_pipe.num_stages} nranks = {args.world_size}"
+
+    assert pipe.num_stages == args.world_size, f"nstages = {pipe.num_stages} nranks = {args.world_size}"
 
     # Create schedule runtime
     stage = PipelineStage(
-        gpt2_pipe,
+        pipe,
         args.rank,
         device=args.device,
     )