Skip to content

Commit

Permalink
Enable hpu graph tests (#115)
Browse files Browse the repository at this point in the history
Co-authored-by: Jerome Anand <[email protected]>
  • Loading branch information
ankitgola005 and jerome-habana authored Dec 4, 2023
1 parent 99622f5 commit 78c5da7
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 9 deletions.
3 changes: 2 additions & 1 deletion tests/run_standalone_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# THIS FILE ASSUMES IT IS RUN INSIDE THE tests DIRECTORY
set -e

# Default hpus
# Defaults
hpus=2

# Parse input args
Expand Down Expand Up @@ -53,6 +53,7 @@ for test in $tests; do
result="$test:${status^^}"
echo $result
if [[ $status == "failed" ]]; then
cat $test-results.xml
exit 1
fi
results+=("$result")
Expand Down
12 changes: 4 additions & 8 deletions tests/test_pytorch/test_hpu_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def __init__(self, graph_mode=HPUGraphMode.TRAIN_NONE, batch_size=None):
self.automatic_optimization = False
self.training_step = self.train_with_capture_and_replay
self.static_input = torch.zeros((batch_size), 1, 28, 28, device="hpu")
self.static_target = torch.randint(0, 10, (batch_size,), device="hpu")
self.static_y_pred = torch.randint(0, 10, (batch_size,), device="hpu")
self.static_target = torch.zeros((batch_size,), device="hpu", dtype=torch.long)
self.static_y_pred = torch.zeros((batch_size,), device="hpu", dtype=torch.long)
self.static_loss = None
self.acc = None
self.validation_step = self.validation_step_capture_replay
Expand Down Expand Up @@ -159,11 +159,9 @@ def test_step(self, batch, batch_idx):
self.static_input.copy_(x)
self.static_target.copy_(y)
self.g.replay()
acc = self.accuracy(None, y, self.static_y_pred)
self.log("test_acc", self.accuracy(None, y, self.static_y_pred))
else:
logits = self.forward(x)
acc = self.accuracy(logits, y)
self.log("test_acc", acc)
self.log("test_acc", self.accuracy(self.forward(x), y))

@staticmethod
def accuracy(logits, y, pred=None):
Expand Down Expand Up @@ -241,7 +239,6 @@ def test_hpu_graphs(tmpdir, graph_mode, mode):
train_model(tmpdir, 1, model=model, data_module=data_module, profiler=None, mode=mode)


@pytest.mark.xfail(strict=False, reason="TBD: Resolve capture replay issue with validation")
@pytest.mark.parametrize(
"train_modes",
[
Expand Down Expand Up @@ -270,7 +267,6 @@ def test_hpu_graph_accuracy_train(tmpdir, train_modes):
), loss_metrics # Compare val acc


@pytest.mark.xfail(strict=False, reason="TBD: Resolve capture replay issue")
@pytest.mark.parametrize(
"train_modes",
[
Expand Down

0 comments on commit 78c5da7

Please sign in to comment.