Skip to content

Commit

Permalink
adjust tolerance for xpu in utils (#749)
Browse files Browse the repository at this point in the history
fix some accuracy problems.

1. add decorator `toleranceOverride` to opinfo when hooking opdb.
This method can handle the cases with accuracy problems wrapped by
`ops`. (except the difference is nan or inf)

2. fix test_decomp, test_torch accuracy problem.
3. align lastest pytorch code of  `ModuleTest_test`
        "test_Conv2d_dilated_with_long_tensor_cuda",
        "test_Conv2d_groups_thnn_with_long_tensor_cuda",
        "test_Conv2d_groups_with_long_tensor_cuda",
        "test_Conv2d_no_bias_with_long_tensor_cuda",
        "test_Conv2d_padding_with_long_tensor_cuda",
        "test_Conv2d_strided_with_long_tensor_cuda",
        "test_Conv2d_with_long_tensor_cuda",
        "test_Conv3d_1x1x1_no_bias_with_long_tensor_cuda",
        "test_Conv3d_groups_with_long_tensor_cuda",
        "test_Conv3d_no_bias_with_long_tensor_cuda",
        "test_Conv3d_stride_padding_with_long_tensor_cuda",
        "test_Conv3d_stride_with_long_tensor_cuda",
        "test_Conv3d_with_long_tensor_cuda",
        "test_ConvTranspose2d_dilated_with_long_tensor_cuda",
        "test_ConvTranspose2d_groups_with_long_tensor_cuda",
        "test_ConvTranspose2d_no_bias_with_long_tensor_cuda",
        "test_ConvTranspose2d_with_long_tensor_cuda",
5. add 'nn_AvgPool2d' to cuda xfail list
        "test_memory_format_nn_AvgPool2d_xpu_float32",
        "test_memory_format_nn_AvgPool2d_xpu_float64",
7. clean skiplist, remove pass cases with lastest code.
        "test_compare_cpu_native_dropout_backward_xpu_bool",
        "test_compare_cpu_native_dropout_backward_xpu_int16",
        "test_compare_cpu_native_dropout_backward_xpu_int32",
        "test_compare_cpu_native_dropout_backward_xpu_int64",
        "test_compare_cpu_native_dropout_backward_xpu_int8",
        "test_compare_cpu_native_dropout_backward_xpu_uint8",
        "test_compare_cpu_nn_functional_avg_pool2d_xpu_int64",
        "test_compare_cpu_abs_xpu_bool",
        "test_dtypes_nn_functional_linear_xpu",
        "test_dtypes_nn_functional_pad_replicate_negative_xpu",
        "test_dtypes_nn_functional_pad_replicate_xpu",
        "test_dtypes_unique_consecutive_xpu",
        "test_SmoothL1Loss_no_batch_dim_mean_cuda_half",
        "test_SmoothL1Loss_no_batch_dim_none_cuda_half",
        "test_SmoothL1Loss_no_batch_dim_sum_cuda_half",
        "test_tensor_ctor_device_inference_xpu",
        "test_trace_xpu_float16",
        "test_fn_fwgrad_bwgrad_linalg_det_singular_xpu_float64",
        "test_fn_fwgrad_bwgrad_linalg_pinv_singular_xpu_complex128",
        "test_fn_fwgrad_bwgrad_linalg_vector_norm_xpu_complex128",
        "test_fn_fwgrad_bwgrad_masked_normalize_xpu_complex128",
        "test_fn_fwgrad_bwgrad_norm_inf_xpu_complex128",
        "test_fn_fwgrad_bwgrad_renorm_xpu_complex128",
        "test_forward_mode_AD_linalg_vector_norm_xpu_complex128",
        "test_forward_mode_AD_masked_normalize_xpu_complex128",
        "test_forward_mode_AD_norm_inf_xpu_complex128",
        "test_forward_mode_AD_renorm_xpu_complex128",
        "test_inplace_forward_mode_AD_renorm_xpu_complex128",
        "test_fn_fwgrad_bwgrad_nn_functional_group_norm_xpu_float64",
        "test_forward_mode_AD_nn_functional_group_norm_xpu_float64",
        "test_fn_gradgrad_linalg_det_singular_xpu_float64",
        "test_fn_gradgrad_linalg_pinv_singular_xpu_complex128",
        "test_fn_grad_masked_normalize_xpu_complex128",
        "test_fn_grad_renorm_xpu_complex128",
        "test_fn_gradgrad_linalg_vector_norm_xpu_complex128",
        "test_fn_gradgrad_masked_normalize_xpu_complex128",
        "test_fn_gradgrad_renorm_xpu_complex128",
        "test_inplace_grad_renorm_xpu_complex128",
        "test_inplace_gradgrad_renorm_xpu_complex128",
        "test_fn_grad_nn_functional_max_pool2d_xpu_float64",
        "test_multihead_attn_fast_path_small_test_xpu_float64",

---------

Co-authored-by: Feng Yuan <[email protected]>
  • Loading branch information
yuchengliu1 and fengyuan14 authored Aug 21, 2024
1 parent 97532c0 commit e040874
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 137 deletions.
41 changes: 4 additions & 37 deletions test/xpu/extended/run_test_with_skip.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,7 @@
"test_compare_cpu_exp2_xpu_complex128",
"test_compare_cpu_exp2_xpu_complex64",
"test_compare_cpu_nextafter_xpu_bfloat16",
# skip random failure due to accuracy
# AssertionError: Tensor-likes are not close!
"test_compare_cpu_atan2_xpu_bfloat16",
# CUDA does not support the data type either
"test_compare_cpu_native_dropout_backward_xpu_bool",
"test_compare_cpu_native_dropout_backward_xpu_int16",
"test_compare_cpu_native_dropout_backward_xpu_int32",
"test_compare_cpu_native_dropout_backward_xpu_int64",
"test_compare_cpu_native_dropout_backward_xpu_int8",
"test_compare_cpu_native_dropout_backward_xpu_uint8",
"test_non_standard_bool_values_native_dropout_backward_xpu_bool",
# Need FP64 golden ref for more accurate comparison
"test_compare_cpu_log_softmax_xpu_bfloat16",
Expand All @@ -79,8 +70,6 @@
# https://en.cppreference.com/w/cpp/numeric/complex/exp
"test_compare_cpu_sigmoid_xpu_complex64",
"test_compare_cpu_sigmoid_xpu_complex128",
# Align with CUDA dtypes - RuntimeError: "avg_pool2d_out_xpu" not implemented for 'Long'
"test_compare_cpu_nn_functional_avg_pool2d_xpu_int64",
# Special handle (different calculation order) in CPU reference impl.
# https://github.com/pytorch/pytorch/blob/c97e3ebb96d7457075b019b94411e8c2d058e68b/aten/src/ATen/native/EmbeddingBag.cpp#L300
"test_compare_cpu_nn_functional_embedding_bag_xpu_bfloat16",
Expand All @@ -93,11 +82,6 @@
"test_view_replay_nn_functional_embedding_bag_xpu_float32",
# Double and complex datatype matmul is not supported in oneDNN
"test_compare_cpu_cdist_xpu_float64",
# CPU reference fail. `abs_cpu` does not support bool.
# The case should be skipped by PyTorch test infrastructure, but not be
# skipped correctly after https://github.com/pytorch/pytorch/pull/124147
# https://github.com/intel/torch-xpu-ops/issues/412
"test_compare_cpu_abs_xpu_bool",
# bilinear interpolate includes large calculation steps, accuracy reduces in half-precision
# Not in CUDA test scope too
"test_compare_cpu_nn_functional_upsample_bilinear_xpu_bfloat16",
Expand Down Expand Up @@ -146,28 +130,11 @@
# RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
# Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error.
"test_compare_cpu_polar_xpu_bfloat16",
# Regressions due to PyTorch uplift (Numeric difference in float and bfloat)
# https://github.com/intel/torch-xpu-ops/issues/549
# Example fail log
# FAILED test_ops_xpu.py::TestCommonXPU::test_compare_cpu_nn_functional_batch_norm_xpu_float16 - AssertionError: Tensor-likes are not close!
# Mismatched elements: 3 / 72 (4.2%)
# Greatest absolute difference: 0.0029296875 at index (0, 1, 1, 0) (up to 0.001 allowed)
# Greatest relative difference: 0.0032501220703125 at index (2, 1, 2, 1) (up to 0.001 allowed)
"test_compare_cpu_nn_functional_batch_norm_xpu_float16",
"test_compare_cpu_std_mean_xpu_bfloat16",
"test_compare_cpu_sub_xpu_float16",
"test_compare_cpu_var_mean_xpu_bfloat16",
# Precision error.
# Mismatched elements: 2 / 125 (1.6%)
# Greatest absolute difference: 0.001953125 at index (2, 0, 0) (up to 0.001 allowed)
# Greatest relative difference: 0.007568359375 at index (2, 0, 0) (up to 0.001 allowed)
"test_compare_cpu_cumprod_xpu_bfloat16",
# Precision error.
# Mismatched elements: 1 / 9 (11.1%)
# Greatest absolute difference: 0.001953125 at index (2, 2) (up to 0.001 allowed)
# Greatest relative difference: 0.004669189453125 at index (2, 2) (up to 0.001 allowed)
# Not in CUDA test scope too
"test_compare_cpu_prod_xpu_bfloat16 ",
# Mismatched elements: 1 / 25 (4.0%)
# Greatest absolute difference: 0.00146484375 at index (0, 0) (up to 0.001 allowed)
# Greatest relative difference: 0.0163116455078125 at index (0, 0) (up to 0.001 allowed)
"test_compare_cpu_sub_xpu_float16",
# different results for value index due to unstable sort.
# XPU and CUDA have the same result.
"test_compare_cpu_median_xpu_int16",
Expand Down
18 changes: 14 additions & 4 deletions test/xpu/extended/test_ops_xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,20 @@ class Namespace:
# Therefore, we build TestCommonProxy by inheriting the TestCommon and TestCase to ensure
# the same feature set as the TestCommon.
class TestCommonProxy(TestCase, TestCommonBase):
pass
def __init__(self, test_case = None):
if test_case:
# copy custom accuracy setting
self.maxDiff = test_case.maxDiff
self.precision = test_case.precision
self.rel_tol = test_case.rel_tol

class TestCompositeComplianceProxy(TestCase, TestCompositeComplianceBase):
pass
def __init__(self, test_case = None):
if test_case:
# copy custom accuracy setting
self.maxDiff = test_case.maxDiff
self.precision = test_case.precision
self.rel_tol = test_case.rel_tol


class TestCommon(TestCase):
Expand All @@ -78,13 +88,13 @@ class TestCommon(TestCase):
def test_compare_cpu(self, device, dtype, op):
# check if supported both by CPU and XPU
if dtype in op.dtypes and dtype in op.supported_dtypes(device):
self.proxy = Namespace.TestCommonProxy()
self.proxy = Namespace.TestCommonProxy(self)
test_common_test_fn = get_wrapped_fn(Namespace.TestCommonProxy.test_compare_cpu)
test_common_test_fn(self.proxy, device, dtype, op)
# for CUDA doesn't support operators
elif (op.name in _ops_without_cuda_support):
if dtype in op.dtypes:
self.proxy = Namespace.TestCommonProxy()
self.proxy = Namespace.TestCommonProxy(self)
test_common_test_fn = get_wrapped_fn(Namespace.TestCommonProxy.test_compare_cpu)
test_common_test_fn(self.proxy, device, dtype, op)
else:
Expand Down
75 changes: 2 additions & 73 deletions test/xpu/skip_list_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,11 +697,6 @@
"test_conj_view_to_sparse_xpu_complex64",
"test_neg_conj_view_to_sparse_xpu_complex128",
"test_neg_view_to_sparse_xpu_float64",
# # CPU fallback error:AssertionError: The supported dtypes for nn.functional.pad on device type xpu are incorrect!
# The following dtypes did not work in forward but are listed by the OpInfo: {torch.float16}.
# The following dtypes did not work in backward but are listed by the OpInfo: {torch.float16}.
"test_dtypes_nn_functional_pad_replicate_negative_xpu",
"test_dtypes_nn_functional_pad_replicate_xpu",
# Op impl aligns with CUDA on the supported dtypes.
# RuntimeError: "avg_pool2d_xpu" not implemented for 'Long'.
# Retrieve the case, once avg_pool1d is supported. Test infra will change claimed dtypes in test case once the op is listed
Expand All @@ -711,7 +706,6 @@
#AssertionError: The supported dtypes for unique_consecutive on device type xpu are incorrect!
#The following dtypes worked in forward but are not listed by the OpInfo: {torch.bfloat16}.
#XPU supports bfloat16, CUDA doesn't support it.
"test_dtypes_unique_consecutive_xpu",
"test_dtypes_unique_xpu",
# RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
# Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error.
Expand Down Expand Up @@ -1032,10 +1026,8 @@
"test_save_load_nn_TransformerEncoder_train_mode_xpu_float64",
"test_save_load_nn_Transformer_xpu_float64",
# Unexpected success:
"test_cpu_gpu_parity_nn_ConvTranspose2d_xpu_complex32",
"test_cpu_gpu_parity_nn_ConvTranspose1d_xpu_complex32",
"test_memory_format_nn_AvgPool2d_xpu_float32",
"test_memory_format_nn_AvgPool2d_xpu_float64",
"test_cpu_gpu_parity_nn_ConvTranspose2d_xpu_complex32",
# CPU fallback could not cover these
# CUDA xfails
# Failed: Unexpected success
Expand Down Expand Up @@ -1104,31 +1096,12 @@
"test_to_nn_TransformerEncoder_eval_mode_swap_True_set_grad_True_xpu_float32",
"test_to_nn_TransformerEncoder_train_mode_swap_True_set_grad_True_xpu_float32",
"test_to_nn_Transformer_swap_True_set_grad_True_xpu_float32",
#issue 746, adjust tolerence
"test_non_contiguous_tensors_nn_Conv3d_xpu_float32",
),

"test_nn_xpu.py": (
# AttributeError: module 'torch.xpu' has no attribute 'FloatTensor'
"test_type",
# AssertionError: Tensor-likes are not close!
"test_Conv2d_dilated_with_long_tensor_cuda",
"test_Conv2d_groups_thnn_with_long_tensor_cuda",
"test_Conv2d_groups_with_long_tensor_cuda",
"test_Conv2d_no_bias_with_long_tensor_cuda",
"test_Conv2d_padding_with_long_tensor_cuda",
"test_Conv2d_strided_with_long_tensor_cuda",
"test_Conv2d_with_long_tensor_cuda",
"test_Conv3d_1x1x1_no_bias_with_long_tensor_cuda",
"test_Conv3d_groups_with_long_tensor_cuda",
"test_Conv3d_no_bias_with_long_tensor_cuda",
"test_Conv3d_stride_padding_with_long_tensor_cuda",
"test_Conv3d_stride_with_long_tensor_cuda",
"test_Conv3d_with_long_tensor_cuda",
"test_ConvTranspose2d_dilated_with_long_tensor_cuda",
"test_ConvTranspose2d_groups_with_long_tensor_cuda",
"test_ConvTranspose2d_no_bias_with_long_tensor_cuda",
"test_ConvTranspose2d_with_long_tensor_cuda",
"test_RReLU_cuda",
"test_RReLU_no_batch_dim_cuda",
"test_RReLU_with_up_down_cuda",
Expand Down Expand Up @@ -1176,10 +1149,6 @@
# AssertionError: False is not true
"test_ctc_loss_cudnn_xpu", # want "xpu" in function name
"test_ctc_loss_cudnn_tensor", # want "xpu" in function name
# RuntimeError: "smooth_l1_backward_cpu_out" not implemented for 'Half'
"test_SmoothL1Loss_no_batch_dim_mean_cuda_half",
"test_SmoothL1Loss_no_batch_dim_none_cuda_half",
"test_SmoothL1Loss_no_batch_dim_sum_cuda_half",
# RuntimeError: "multilabel_margin_loss_forward_out_frame" not implemented for 'Half'
"test_MultiLabelMarginLoss_no_batch_dim_mean_cuda_half",
"test_MultiLabelMarginLoss_no_batch_dim_none_cuda_half",
Expand Down Expand Up @@ -1230,8 +1199,6 @@
# CPU only (vs Numpy). CUDA skips these cases since non-deterministic results are outputed for inf and nan.
"test_float_to_int_conversion_finite_xpu_int8",
"test_float_to_int_conversion_finite_xpu_int16",
# sparse
"test_tensor_ctor_device_inference_xpu",
# Dispatch issue. It is a composite operator. But it is implemented by
# DispatchStub. XPU doesn't support DispatchStub.
"test_kaiser_window_xpu",
Expand Down Expand Up @@ -1316,11 +1283,6 @@
"test_reference_numerics_large_asinh_xpu_complex128",
"test_reference_numerics_large_asinh_xpu_complex64",
"test_reference_numerics_large_asinh_xpu_complex32",
# Mismatched elements: 1 / 943593 (0.0%)
# Greatest absolute difference: 1.3363442121772096e-05 at index (742, 249) (up to 1e-05 allowed)
# Greatest relative difference: 8.852276550896931e-06 at index (742, 249) (up to 1.3e-06 allowed)
"test_reference_numerics_normal_nn_functional_tanhshrink_xpu_complex64",

# AssertionError: Tensor-likes are not close!
# exceeded maximum allowed difference
# Greatest absolute difference: 6.266784475883469e-05 at index (463, 204) (up to 1e-05 allowed)
Expand Down Expand Up @@ -1468,8 +1430,6 @@
# https://github.com/intel/torch-xpu-ops/issues/275
# NotImplementedError: Could not run 'aten::empty_quantized' with arguments from the 'QuantizedXPU' backend.
"test_flip_xpu_float32",
# RuntimeError: "trace" not implemented for 'Half'
"test_trace_xpu_float16",
),

"test_content_store_xpu.py": None,
Expand Down Expand Up @@ -1729,7 +1689,6 @@
"test_fn_fwgrad_bwgrad_linalg_cholesky_xpu_float64",
"test_fn_fwgrad_bwgrad_linalg_cond_xpu_complex128",
"test_fn_fwgrad_bwgrad_linalg_cond_xpu_float64",
"test_fn_fwgrad_bwgrad_linalg_det_singular_xpu_float64",
"test_fn_fwgrad_bwgrad_linalg_det_xpu_complex128",
"test_fn_fwgrad_bwgrad_linalg_det_xpu_float64",
"test_fn_fwgrad_bwgrad_linalg_eig_xpu_complex128",
Expand Down Expand Up @@ -1765,7 +1724,6 @@
"test_fn_fwgrad_bwgrad_linalg_norm_xpu_float64",
"test_fn_fwgrad_bwgrad_linalg_pinv_hermitian_xpu_complex128",
"test_fn_fwgrad_bwgrad_linalg_pinv_hermitian_xpu_float64",
"test_fn_fwgrad_bwgrad_linalg_pinv_singular_xpu_complex128",
"test_fn_fwgrad_bwgrad_linalg_pinv_singular_xpu_float64",
"test_fn_fwgrad_bwgrad_linalg_pinv_xpu_complex128",
"test_fn_fwgrad_bwgrad_linalg_pinv_xpu_float64",
Expand Down Expand Up @@ -1957,16 +1915,7 @@
# torch.autograd.gradcheck.GradcheckError: While considering the real part of complex inputs only, Jacobian computed with forward mode mismatch for output 0 with respect to input 0,
"test_fn_fwgrad_bwgrad_linalg_norm_xpu_complex128",
# torch.autograd.gradcheck.GradcheckError: While considering the imaginary part of complex inputs only, Jacobian computed with forward mode mismatch for output 0 with respect to input 0,
"test_fn_fwgrad_bwgrad_linalg_vector_norm_xpu_complex128",
"test_fn_fwgrad_bwgrad_masked_normalize_xpu_complex128",
"test_fn_fwgrad_bwgrad_norm_inf_xpu_complex128",
"test_fn_fwgrad_bwgrad_renorm_xpu_complex128",
"test_forward_mode_AD_linalg_norm_xpu_complex128",
"test_forward_mode_AD_linalg_vector_norm_xpu_complex128",
"test_forward_mode_AD_masked_normalize_xpu_complex128",
"test_forward_mode_AD_norm_inf_xpu_complex128",
"test_forward_mode_AD_renorm_xpu_complex128",
"test_inplace_forward_mode_AD_renorm_xpu_complex128",
# RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive
"test_fn_fwgrad_bwgrad_nn_functional_conv_transpose2d_xpu_complex128",
"test_fn_fwgrad_bwgrad_nn_functional_conv_transpose2d_xpu_float64",
Expand All @@ -1976,9 +1925,6 @@
"test_forward_mode_AD_nn_functional_conv_transpose2d_xpu_float64",
"test_forward_mode_AD_nn_functional_conv_transpose3d_xpu_complex128",
"test_forward_mode_AD_nn_functional_conv_transpose3d_xpu_float64",
# RuntimeError: input tensor must have at least one element, but got input_sizes = [1, 0, 1]
"test_fn_fwgrad_bwgrad_nn_functional_group_norm_xpu_float64",
"test_forward_mode_AD_nn_functional_group_norm_xpu_float64",
# torch.autograd.gradcheck.GradcheckError: Jacobian computed with forward mode mismatch for output 0 with respect to input 0,
"test_fn_fwgrad_bwgrad_nn_functional_rrelu_xpu_float64",
"test_forward_mode_AD_nn_functional_rrelu_xpu_float64",
Expand Down Expand Up @@ -2010,11 +1956,6 @@
"test_scaled_mm_vs_emulated_float16_xpu",
"test_scaled_mm_vs_emulated_float32_xpu",
"test_scaled_mm_vs_emulated_row_wise_bfloat16_xpu",
# https://github.com/intel/torch-xpu-ops/issues/676
# Mismatched elements: 9 / 1003002 (0.0%)
# Greatest absolute difference: 711.126220703125 at index (472, 999) (up to 0.1 allowed)
# Greatest relative difference: 2.7107455730438232 at index (472, 997) (up to 0.1 allowed)
"test_cublas_addmm_size_1000_xpu_float32",
),

"test_maskedtensor_xpu.py": (
Expand Down Expand Up @@ -2110,6 +2051,7 @@
"test_reduction_all_sum_layout1_xpu_float16",
"test_reduction_all_sum_layout1_xpu_float64",
# RuntimeError: device type of values (xpu) must be CPU or CUDA or Meta
"test_like_",
"test_invalid_sparse_layout_xpu",
"test_to_dense_and_sparse_csr_xpu",
"test_binary_core_add_layout2_xpu_float16",
Expand Down Expand Up @@ -2359,7 +2301,6 @@
"test_fn_gradgrad_linalg_cholesky_xpu_float64",
"test_fn_gradgrad_linalg_cond_xpu_complex128",
"test_fn_gradgrad_linalg_cond_xpu_float64",
"test_fn_gradgrad_linalg_det_singular_xpu_float64",
"test_fn_gradgrad_linalg_det_xpu_complex128",
"test_fn_gradgrad_linalg_det_xpu_float64",
"test_fn_gradgrad_linalg_eig_xpu_complex128",
Expand Down Expand Up @@ -2394,7 +2335,6 @@
"test_fn_gradgrad_linalg_multi_dot_xpu_float64",
"test_fn_gradgrad_linalg_pinv_hermitian_xpu_complex128",
"test_fn_gradgrad_linalg_pinv_hermitian_xpu_float64",
"test_fn_gradgrad_linalg_pinv_singular_xpu_complex128",
"test_fn_gradgrad_linalg_pinv_singular_xpu_float64",
"test_fn_gradgrad_linalg_pinv_xpu_complex128",
"test_fn_gradgrad_linalg_pinv_xpu_float64",
Expand Down Expand Up @@ -2486,14 +2426,6 @@
"test_fn_gradgrad_nn_functional_rrelu_xpu_float64",
"test_inplace_grad_nn_functional_rrelu_xpu_float64",
"test_inplace_gradgrad_nn_functional_rrelu_xpu_float64",
### Error #3 in TestBwdGradientsXPU , totally 8 , torch.autograd.gradcheck.GradcheckError: While considering the imaginary part of complex outputs only, Jacobian mismatch for output 0 with respect to input 0,
"test_fn_grad_masked_normalize_xpu_complex128",
"test_fn_grad_renorm_xpu_complex128",
"test_fn_gradgrad_linalg_vector_norm_xpu_complex128",
"test_fn_gradgrad_masked_normalize_xpu_complex128",
"test_fn_gradgrad_renorm_xpu_complex128",
"test_inplace_grad_renorm_xpu_complex128",
"test_inplace_gradgrad_renorm_xpu_complex128",
### Error #4 in TestBwdGradientsXPU , totally 8 , RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive
"test_fn_grad_nn_functional_conv_transpose2d_xpu_complex128",
"test_fn_grad_nn_functional_conv_transpose2d_xpu_float64",
Expand All @@ -2503,8 +2435,6 @@
"test_fn_gradgrad_nn_functional_conv_transpose2d_xpu_float64",
"test_fn_gradgrad_nn_functional_conv_transpose3d_xpu_complex128",
"test_fn_gradgrad_nn_functional_conv_transpose3d_xpu_float64",
### Error #6 in TestBwdGradientsXPU , totally 5 , torch.autograd.gradcheck.GradcheckError: Backward is not reentrant, i.e., running backward with same input and grad_output multiple times gives different values, although analytical gradient matches numerical gradient.The tolerance for nondeterminism was 0.0.
"test_fn_grad_nn_functional_max_pool2d_xpu_float64",
"test_fn_gradgrad_index_reduce_mean_xpu_float64",
"test_fn_gradgrad_index_reduce_prod_xpu_float64",
"test_inplace_gradgrad_index_reduce_mean_xpu_float64",
Expand Down Expand Up @@ -2672,7 +2602,6 @@
"test_multihead_attention_dtype_batch_first_xpu_float64",
"test_multihead_attention_dtype_xpu_float64",
"test_multihead_attn_fast_path_query_and_bias_have_different_dtypes_xpu_float64",
"test_multihead_attn_fast_path_small_test_xpu_float64",
"test_multihead_attn_in_proj_bias_none_xpu_float64",
"test_multihead_attn_in_proj_weight_none_xpu_float64",
),
Expand Down
Loading

0 comments on commit e040874

Please sign in to comment.