Skip to content

Commit

Permalink
Refine skip list in test_ops_xpu.py (#1126)
Browse files Browse the repository at this point in the history
Includes:
- Refine skip list of `test_ops_xpu.py` & `test_decomp_xpu.py`
- Refine none-deterministic operators alert list

---------

Co-authored-by: fengqing.lu <[email protected]>
  • Loading branch information
xytintel and LuFinch authored Dec 4, 2024
1 parent be810b5 commit ea6d3a6
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 77 deletions.
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/AdaptiveAveragePooling2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Tensor adaptive_avg_pool2d_backward_xpu(
(input.ndimension() == 3 || input.ndimension() == 4),
"non-empty 3D or 4D (batch mode) tensor expected for input");

globalContext().alertNotDeterministic("_adaptive_avg_pool2d_backward");
globalContext().alertNotDeterministic("adaptive_avg_pool2d_backward_xpu");

Tensor grad_input;
if (input.numel() != 0) {
Expand Down
1 change: 1 addition & 0 deletions src/ATen/native/xpu/UpSampleBilinear2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ TORCH_IMPL_FUNC(upsample_bilinear2d_backward_out_xpu)
std::optional<double> scales_h,
std::optional<double> scales_w,
const Tensor& grad_input) {
globalContext().alertNotDeterministic("upsample_bilinear2d_backward_out_xpu");
xpu::upsample_bilinear2d_backward_out_kernel(
grad_input,
grad_output,
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/EmbeddingBag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,8 @@ Tensor embedding_bag_backward_xpu_max(
const Tensor& max_indices_t,
int64_t num_weights,
int64_t padding_idx) {
globalContext().alertNotDeterministic("embedding_bag_backward_xpu_max");

auto max_indices = max_indices_t.contiguous();
auto grad_weight = at::zeros({num_weights, grad.size(1)}, grad.options());
int64_t stride = grad_weight.stride(0);
Expand Down
13 changes: 11 additions & 2 deletions src/ATen/native/xpu/sycl/Indexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ void index_select_kernel(
}),
AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX),
AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES),
AT_EXPAND(AT_FLOAT8_TYPES),
AT_EXPAND(AT_FLOAT8_TYPES),
kComplexHalf,
kHalf,
kBool,
Expand Down Expand Up @@ -1081,7 +1081,8 @@ void take_kernel(TensorIterator& iter, const TensorBase& input) {
canUse32BitIndexMath(input) ? ScalarType::Int : ScalarType::Long,
"take_xpu_index",
[&] {
const scalar_t* indexed_ptr = input.template const_data_ptr<scalar_t>();
const scalar_t* indexed_ptr =
input.template const_data_ptr<scalar_t>();
TakeFunctor<scalar_t, index_t> f(indexed_ptr);
take_put_kernel_template<scalar_t, index_t>(iter, input, f);
});
Expand Down Expand Up @@ -1114,6 +1115,14 @@ void put_kernel(
TensorIterator& iter,
const TensorBase& output,
const bool accumulate) {
// Nondeterministic when index contains duplicate entries and we do not
// accumulate If we accumulate on GPU, we use atomicGPUAdd, which is
// non-deterministic
if (!accumulate ||
(accumulate && iter.tensor(1).device().type() == DeviceType::XPU)) {
at::globalContext().alertNotDeterministic("put_");
}

AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(
at::ScalarType::BFloat16,
at::ScalarType::Half,
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/LossCTCKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ Tensor ctc_loss_backward_kernel(
bool zero_infinity) {
// See Note [Writing Nondeterministic Operations]
// Nondeterministic because of atomicAdd usage
globalContext().alertNotDeterministic("ctc_loss_backward_kernel");
globalContext().alertNotDeterministic("ctc_loss_backward_xpu");
return AT_DISPATCH_FLOATING_TYPES(
log_probs.scalar_type(), "ctc_loss_backward_xpu", [&] {
if (targets.scalar_type() == kLong) {
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/LossNLL2dKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ void nll_loss2d_forward_kernel(
int64_t reduction,
int64_t ignore_index) {
if (reduction != at::Reduction::None) {
at::globalContext().alertNotDeterministic("nll_loss2d_forward_kernel");
at::globalContext().alertNotDeterministic("nll_loss2d_forward_xpu");
}

total_weight.resize_({});
Expand Down
142 changes: 76 additions & 66 deletions test/xpu/skip_list_common.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/xpu/test_decomp_xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def _op_assert_ref(test_case, op, test_dtype, i, orig, decomp, ref, args, kwargs
(torch.float16, torch.ops.aten.nll_loss_forward.default): 1e-2,
(torch.bfloat16, torch.ops.aten.nll_loss_forward.default): 1e-1,
(torch.float16, torch.ops.aten.nll_loss2d_forward.default): 1e-2,
(torch.float16, torch.ops.aten.nll_loss2d_backward.default): 1e-4,
(torch.bfloat16, torch.ops.aten.nll_loss2d_forward.default): 2e-1,
(torch.float16, torch.ops.aten.hardswish.default): 2e-7,
(torch.bfloat16, torch.ops.aten.hardswish.default): 2e-7,
Expand Down
1 change: 1 addition & 0 deletions test/xpu/test_indexing_xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from test_indexing import NumpyTests,TestIndexing
import torch

torch.Tensor.is_cuda = torch.Tensor.is_xpu

def __test_index_put_accumulate_with_optional_tensors(self, device):
# TODO: replace with a better solution.
Expand Down
13 changes: 7 additions & 6 deletions test/xpu/test_torch_xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1439,8 +1439,10 @@ def test_nondeterministic_alert_AvgPool3d(self, device):
res = module(input)
grad = torch.ones_like(res)

self.check_device_nondeterministic_alert(grad, 'avg_pool3d_backward')

self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'avg_pool3d_backward_' + torch.device(device).type,
torch.device(device).type == 'cuda' or torch.device(device).type == 'xpu')

@skipIfMPS
@skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707")
Expand Down Expand Up @@ -1478,7 +1480,7 @@ def test_nondeterministic_alert_MaxPool3d(self, device):

self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'max_pool3d_with_indices_backward' + torch.device(device).type,
'max_pool3d_with_indices_backward_' + torch.device(device).type,
torch.device(device).type == 'cuda' or torch.device(device).type == 'xpu')

@skipIfMPS
Expand Down Expand Up @@ -1770,10 +1772,9 @@ def test_nondeterministic_alert_NLLLoss(self, device):
input = torch.randn(2, 3, 5, 5, device=device)
target = torch.rand(2, 5, 5, device=device).mul(3).floor().long()


self.check_nondeterministic_alert(
lambda: module(input, target),
'nll_loss2d_forward_out_' + torch.device(device).type + '_template',
'nll_loss2d_forward_' + torch.device(device).type,
torch.device(device).type == 'cuda' or torch.device(device).type == 'xpu')

@skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707")
Expand All @@ -1788,7 +1789,7 @@ def test_nondeterministic_alert_CTCLoss(self, device):

self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'ctc_loss_backward_gpu',
'ctc_loss_backward_' + torch.device(device).type,
torch.device(device).type == 'cuda' or torch.device(device).type == 'xpu')

@skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707")
Expand Down

0 comments on commit ea6d3a6

Please sign in to comment.