Enable hpu graph tests (#115)

Co-authored-by: Jerome Anand <[email protected]>
Lightning-AI · Dec 4, 2023 · 78c5da7 · 78c5da7
1 parent 99622f5
commit 78c5da7
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 9 deletions.
diff --git a/tests/run_standalone_tests.sh b/tests/run_standalone_tests.sh
@@ -16,7 +16,7 @@
 # THIS FILE ASSUMES IT IS RUN INSIDE THE tests DIRECTORY
 set -e
 
-# Default hpus
+# Defaults
 hpus=2
 
 # Parse input args
@@ -53,6 +53,7 @@ for test in $tests; do
   result="$test:${status^^}"
   echo $result
   if [[ $status == "failed" ]]; then
+    cat $test-results.xml
     exit 1
   fi
   results+=("$result")

diff --git a/tests/test_pytorch/test_hpu_graphs.py b/tests/test_pytorch/test_hpu_graphs.py
@@ -68,8 +68,8 @@ def __init__(self, graph_mode=HPUGraphMode.TRAIN_NONE, batch_size=None):
             self.automatic_optimization = False
             self.training_step = self.train_with_capture_and_replay
             self.static_input = torch.zeros((batch_size), 1, 28, 28, device="hpu")
-            self.static_target = torch.randint(0, 10, (batch_size,), device="hpu")
-            self.static_y_pred = torch.randint(0, 10, (batch_size,), device="hpu")
+            self.static_target = torch.zeros((batch_size,), device="hpu", dtype=torch.long)
+            self.static_y_pred = torch.zeros((batch_size,), device="hpu", dtype=torch.long)
             self.static_loss = None
             self.acc = None
             self.validation_step = self.validation_step_capture_replay
@@ -159,11 +159,9 @@ def test_step(self, batch, batch_idx):
                 self.static_input.copy_(x)
                 self.static_target.copy_(y)
                 self.g.replay()
-            acc = self.accuracy(None, y, self.static_y_pred)
+                self.log("test_acc", self.accuracy(None, y, self.static_y_pred))
         else:
-            logits = self.forward(x)
-            acc = self.accuracy(logits, y)
-        self.log("test_acc", acc)
+            self.log("test_acc", self.accuracy(self.forward(x), y))
 
     @staticmethod
     def accuracy(logits, y, pred=None):
@@ -241,7 +239,6 @@ def test_hpu_graphs(tmpdir, graph_mode, mode):
     train_model(tmpdir, 1, model=model, data_module=data_module, profiler=None, mode=mode)
 
 
-@pytest.mark.xfail(strict=False, reason="TBD: Resolve capture replay issue with validation")
 @pytest.mark.parametrize(
     "train_modes",
     [
@@ -270,7 +267,6 @@ def test_hpu_graph_accuracy_train(tmpdir, train_modes):
     ), loss_metrics  # Compare val acc
 
 
-@pytest.mark.xfail(strict=False, reason="TBD: Resolve capture replay issue")
 @pytest.mark.parametrize(
     "train_modes",
     [