diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py index 97ff00326..425ffc097 100644 --- a/test/xpu/extended/run_test_with_skip.py +++ b/test/xpu/extended/run_test_with_skip.py @@ -50,11 +50,9 @@ "test_compare_cpu_exp2_xpu_complex128", "test_compare_cpu_exp2_xpu_complex64", "test_compare_cpu_nextafter_xpu_bfloat16", - # skip random failure due to accuracy # AssertionError: Tensor-likes are not close! "test_compare_cpu_atan2_xpu_bfloat16", - # CUDA does not support the data type either "test_compare_cpu_native_dropout_backward_xpu_bool", "test_compare_cpu_native_dropout_backward_xpu_int16", @@ -63,22 +61,17 @@ "test_compare_cpu_native_dropout_backward_xpu_int8", "test_compare_cpu_native_dropout_backward_xpu_uint8", "test_non_standard_bool_values_native_dropout_backward_xpu_bool", - # Need FP64 golden ref for more accurate comparison "test_compare_cpu_log_softmax_xpu_bfloat16", - # TestCompositeCompliance # CPU fallback fails # Require implementing aten::embedding_renorm_ "test_view_replay_nn_functional_embedding_xpu_float32", - # TestCompositeCompliance::test_cow_input # XPU Tensor fails in copy-on-write cases # AssertionError: False is not true : Keyword argument 'output grad 0' during backward call unexpectedly materializes. Either set `supports_cow_input_no_materialize_backward=False` in this operation's OpInfo, add the arg to the OpInfo's `allow_cow_input_materialize_backward` list, or change the implementation to avoid materialization. # https://github.com/intel/torch-xpu-ops/issues/281 "test_cow_input", - - # XPU implementation is correct. # std::exp{-inf, nan}, the result is (±0,±0) (signs are unspecified) # std::exp{-inf, inf}, the result is (±0,±0) (signs are unspecified) @@ -86,36 +79,29 @@ # https://en.cppreference.com/w/cpp/numeric/complex/exp "test_compare_cpu_sigmoid_xpu_complex64", "test_compare_cpu_sigmoid_xpu_complex128", - # Align with CUDA dtypes - RuntimeError: "avg_pool2d_out_xpu" not implemented for 'Long' "test_compare_cpu_nn_functional_avg_pool2d_xpu_int64", - # Special handle (different calculation order) in CPU reference impl. # https://github.com/pytorch/pytorch/blob/c97e3ebb96d7457075b019b94411e8c2d058e68b/aten/src/ATen/native/EmbeddingBag.cpp#L300 "test_compare_cpu_nn_functional_embedding_bag_xpu_bfloat16", "test_compare_cpu_nn_functional_embedding_bag_xpu_float16", - # Not implemented operators, aten::embedding_renorm_. # To retrieve cases when the operators are supported. # https://github.com/intel/torch-xpu-ops/issues/380 "test_compare_cpu_nn_functional_embedding_bag_xpu_float32", "test_compare_cpu_nn_functional_embedding_bag_xpu_float64", "test_view_replay_nn_functional_embedding_bag_xpu_float32", - - #Double and complex datatype matmul is not supported in oneDNN + # Double and complex datatype matmul is not supported in oneDNN "test_compare_cpu_cdist_xpu_float64", - # CPU reference fail. `abs_cpu` does not support bool. # The case should be skipped by PyTorch test infrastructure, but not be # skipped correctly after https://github.com/pytorch/pytorch/pull/124147 # https://github.com/intel/torch-xpu-ops/issues/412 "test_compare_cpu_abs_xpu_bool", - # bilinear interpolate includes large calculation steps, accuracy reduces in half-precision # Not in CUDA test scope too "test_compare_cpu_nn_functional_upsample_bilinear_xpu_bfloat16", "test_compare_cpu_nn_functional_upsample_bilinear_xpu_float16", - # CPU result is not golden reference "test_compare_cpu_nn_functional_group_norm_xpu_bfloat16", "test_compare_cpu_nn_functional_group_norm_xpu_float16", @@ -130,25 +116,20 @@ # Align with CUDA impl by using accumulate type. But CPU doesn't use. # When XPU uses original data type, the case passes. "test_compare_cpu_logit_xpu_bfloat16", - # precison error # Mismatched elements: 1 / 24 (4.2%) # Greatest absolute difference: 0.03125 at index (0, 1, 0, 1) (up to 0.001 allowed) # Greatest relative difference: 0.0048828125 at index (0, 1, 0, 1) (up to 0.001 allowed) "test_compare_cpu_nn_functional_interpolate_bilinear_xpu_bfloat16", - # RuntimeError: "compute_index_ranges_weights" not implemented for 'Half' "test_compare_cpu_nn_functional_interpolate_bilinear_xpu_float16", - # AssertionError: False is not true : Argument 0 during forward call unexpectedly materializes. Either set `supports_cow_input_no_materialize_forward=False... "test_cow_input_nn_functional_interpolate_bilinear_xpu_float32", "test_cow_input_nn_functional_interpolate_linear_xpu_float32", "test_cow_input_nn_functional_interpolate_trilinear_xpu_float32", - #The results of XPU and CUDA are consistent, but the results of CPU and CUDA are inconsistent "test_compare_cpu_nn_functional_interpolate_linear_xpu_bfloat16", "test_compare_cpu_nn_functional_interpolate_linear_xpu_float16", - # bicubic interpolate includes large calculation steps, accuracy reduces in half-precision # Not in CUDA test scope too "test_compare_cpu_nn_functional_interpolate_bicubic_xpu_bfloat16", @@ -157,17 +138,14 @@ # Retrieve it once the operator is implemented. # Error: The operator 'aten::glu_jvp' is not currently implemented for the XPU device. "test_forward_ad_nn_functional_glu_xpu_float32", - # Precision error. # Mismatched elements: 1 / 812 (0.1%) # Greatest absolute difference: 0.03125 at index (610,) (up to 0.001 allowed) # Greatest relative difference: 0.00396728515625 at index (610,) (up to 0.001 allowed) "test_compare_cpu_hypot_xpu_bfloat16", - # RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16. # Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error. "test_compare_cpu_polar_xpu_bfloat16", - # Regressions due to PyTorch uplift (Numeric difference in float and bfloat) # https://github.com/intel/torch-xpu-ops/issues/549 # Example fail log @@ -179,25 +157,21 @@ "test_compare_cpu_std_mean_xpu_bfloat16", "test_compare_cpu_sub_xpu_float16", "test_compare_cpu_var_mean_xpu_bfloat16", - # test case doesn't make sense, will file an issue to track it. # https://github.com/pytorch/pytorch/issues/130916 "test_compare_cpu_histogram_xpu_float32", "test_compare_cpu_histogram_xpu_float64", - # Precision error. # Mismatched elements: 2 / 125 (1.6%) # Greatest absolute difference: 0.001953125 at index (2, 0, 0) (up to 0.001 allowed) # Greatest relative difference: 0.007568359375 at index (2, 0, 0) (up to 0.001 allowed) "test_compare_cpu_cumprod_xpu_bfloat16", - # Precision error. # Mismatched elements: 1 / 9 (11.1%) # Greatest absolute difference: 0.001953125 at index (2, 2) (up to 0.001 allowed) # Greatest relative difference: 0.004669189453125 at index (2, 2) (up to 0.001 allowed) # Not in CUDA test scope too "test_compare_cpu_prod_xpu_bfloat16 ", - # different results for value index due to unstable sort. # XPU and CUDA have the same result. "test_compare_cpu_median_xpu_int16", diff --git a/test/xpu/run_test_with_skip.py b/test/xpu/run_test_with_skip.py index ae19942f3..8921075c3 100644 --- a/test/xpu/run_test_with_skip.py +++ b/test/xpu/run_test_with_skip.py @@ -38,6 +38,8 @@ def launch_test(test_case, skip_list=None, exe_list=None): res = 0 # test_ops + + skip_list = ( # Skip list of base line "test_dtypes___rmod___xpu", @@ -84,8 +86,6 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_out_requires_grad_error_sparse_sampled_addmm_xpu_complex64", "test_out_requires_grad_error_sparse_sampled_addmm_xpu_float32", "test_out_warning_nanmean_xpu", - "test_out_warning_nn_functional_logsigmoid_xpu", - "test_python_ref__refs_div_trunc_rounding_xpu_bfloat16", "test_python_ref__refs_linspace_tensor_overload_xpu_int16", "test_python_ref__refs_linspace_tensor_overload_xpu_int32", "test_python_ref__refs_linspace_tensor_overload_xpu_int64", @@ -108,7 +108,6 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_python_ref__refs_nn_functional_triplet_margin_loss_xpu_uint8", "test_python_ref__refs_square_xpu_bool", "test_python_ref__refs_trunc_xpu_float64", - "test_python_ref_executor__refs_div_trunc_rounding_executor_aten_xpu_bfloat16", "test_python_ref_executor__refs_geometric_executor_aten_xpu_bfloat16", "test_python_ref_executor__refs_geometric_executor_aten_xpu_float16", "test_python_ref_executor__refs_geometric_executor_aten_xpu_float32", @@ -149,7 +148,6 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_python_ref_executor__refs_square_executor_aten_xpu_bool", "test_python_ref_executor__refs_vdot_executor_aten_xpu_complex128", "test_python_ref_executor__refs_vdot_executor_aten_xpu_complex64", - "test_python_ref_torch_fallback__refs_div_trunc_rounding_xpu_bfloat16", "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int16", "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int32", "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int64", @@ -209,9 +207,6 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_python_ref_torch_fallback__refs_square_xpu_complex64", # Skip list of new added when porting XPU operators. # See: https://github.com/intel/torch-xpu-ops/issues/128 - "test_dtypes_scatter_reduce_amax_xpu", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' - "test_dtypes_scatter_reduce_amin_xpu", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' - "test_dtypes_scatter_reduce_prod_xpu", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' "test_dtypes_view_as_complex_xpu", # Didn't align with CUDA, The following dtypes did not work in backward but are listed by the OpInfo: {torch.bfloat16} "test_dtypes_view_as_real_xpu", # Didn't align with CUDA, The following dtypes did not work in backward but are listed by the OpInfo: {torch.bfloat16} "test_noncontiguous_samples_native_dropout_backward_xpu_int64", # The implementation aligns with CUDA, RuntimeError: "masked_scale" not implemented for 'Long'. @@ -223,15 +218,8 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_non_standard_bool_values_msort_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "msort" not implemented for 'Bool'. "test_non_standard_bool_values_sort_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "sort" not implemented for 'Bool'. "test_python_ref_executor__refs_pow_executor_aten_xpu_complex32", # Didn't align with CUDA, Unexpected success - "test_compare_cpu_nn_functional_grid_sample_xpu_float32", # AssertionError: Tensor-likes are not close! - "test_dtypes_nn_functional_batch_norm_without_cudnn_xpu", # AssertionError: The supported dtypes for nn.functional.batch_norm on device type xpu are incorrect! # Unexpected success "test_errors_histogramdd_xpu", - "test_noncontiguous_samples__batch_norm_with_update_xpu_float32", - "test_out_histc_xpu_float32", - "test_out_warning_logcumsumexp_xpu", - "test_python_ref__refs_mul_xpu_complex32", - "test_python_ref_torch_fallback__refs_mul_xpu_complex32", # Jiterator is only supported on CUDA and ROCm GPUs, none are available. "_jiterator_", # https://github.com/intel/torch-xpu-ops/issues/157 @@ -755,35 +743,23 @@ def launch_test(test_case, skip_list=None, exe_list=None): # The following dtypes did not work in backward but are listed by the OpInfo: {torch.float16}. "test_dtypes_nn_functional_pad_replicate_negative_xpu", "test_dtypes_nn_functional_pad_replicate_xpu", - # Op impl aligns with CUDA on the supported dtypes. # RuntimeError: "avg_pool2d_xpu" not implemented for 'Long'. # Retrieve the case, once avg_pool1d is supported. Test infra will change claimed dtypes in test case once the op is listed # in XPU supported operators. Then the case will work. "test_noncontiguous_samples_nn_functional_avg_pool1d_xpu_int64", "test_noncontiguous_samples_nn_functional_local_response_norm_xpu_int64", - - # Numeric difference - # https://github.com/intel/torch-xpu-ops/issues/544 - # Mismatched elements: 7 / 1048576 (0.0%) - # Greatest absolute difference: 0.4922053598013041 at index (765, 860) (up to 1e-07 allowed) - # Greatest relative difference: 0.15330001655652495 at index (765, 860) (up to 1e-07 allowed) - "test_python_ref__refs_log2_xpu_complex128", - - #AssertionError: The supported dtypes for unique_consecutive on device type xpu are incorrect! - #The following dtypes worked in forward but are not listed by the OpInfo: {torch.bfloat16}. - #XPU supports bfloat16, CUDA doesn't support it. + # AssertionError: The supported dtypes for unique_consecutive on device type xpu are incorrect! + # The following dtypes worked in forward but are not listed by the OpInfo: {torch.bfloat16}. + # XPU supports bfloat16, CUDA doesn't support it. "test_dtypes_unique_consecutive_xpu", "test_dtypes_unique_xpu", - # RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16. # Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error. "test_dtypes_polar_xpu", - # implemented aten::histogram to align MPS operators coverage, CUDA doesn't support # but test_dtypes infrastructure leverage CUDA supported datatypes "test_dtypes_histogram_xpu", - # The following dtypes worked in forward but are not listed by the OpInfo: {torch.float16}. # Align with CPU implementation since, # 1. most cases of nextafter require Half dtype. @@ -795,6 +771,7 @@ def launch_test(test_case, skip_list=None, exe_list=None): # test_binary_ufuncs + skip_list = ( "test_fmod_remainder_by_zero_integral_xpu_int64", # zero division is an undefined behavior: different handles on different backends "test_div_rounding_numpy_xpu_float16", # Calculation error. XPU implementation uses opmath type. @@ -807,9 +784,6 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_pow_xpu_int64", # AssertionError: Jiterator is only supported on CUDA and ROCm GPUs, none are available. "_jiterator_", - # Unexpected success - "test_type_promotion_logaddexp_xpu", - # nextafter: Numeric error due to `std::nextafter` difference between CPU (GCC) and XPU (SYCL) # https://github.com/intel/torch-xpu-ops/issues/623 # AssertionError: Scalars are not equal! @@ -822,17 +796,10 @@ def launch_test(test_case, skip_list=None, exe_list=None): # test_scatter_gather_ops + skip_list = ( "test_gather_backward_with_empty_index_tensor_sparse_grad_True_xpu_float32", # Could not run 'aten::_sparse_coo_tensor_with_dims_and_tensors' with arguments from the 'SparseXPU' backend. "test_gather_backward_with_empty_index_tensor_sparse_grad_True_xpu_float64", # Could not run 'aten::_sparse_coo_tensor_with_dims_and_tensors' with arguments from the 'SparseXPU' backend. - "test_scatter__reductions_xpu_complex64", # align CUDA dtype - RuntimeError: "scatter_gather_base_kernel_func" not implemented for 'ComplexFloat' - "test_scatter_reduce_amax_xpu_bool", # align CUDA dtype - RuntimeError: "scatter_gather_base_kernel_func" not implemented for 'Bool' - "test_scatter_reduce_amin_xpu_bool", # align CUDA dtype - RuntimeError: "scatter_gather_base_kernel_func" not implemented for 'Bool' - "test_scatter_reduce_mean_xpu_complex128", # align CUDA dtype - RuntimeError: "scatter_gather_base_kernel_func" not implemented for 'ComplexDouble' - "test_scatter_reduce_mean_xpu_complex64", # align CUDA dtype - RuntimeError: "scatter_gather_base_kernel_func" not implemented for 'ComplexFloat' - "test_scatter_reduce_prod_xpu_bool", # align CUDA dtype - RuntimeError: "scatter_gather_base_kernel_func" not implemented for 'Bool' - "test_scatter_reduce_prod_xpu_complex128", # align CUDA dtype - RuntimeError: "scatter_gather_base_kernel_func" not implemented for 'ComplexDouble' - "test_scatter_reduce_prod_xpu_complex64", # align CUDA dtype - RuntimeError: "scatter_gather_base_kernel_func" not implemented for 'ComplexFloat' ) res += launch_test("test_scatter_gather_ops_xpu.py", skip_list) @@ -841,9 +808,7 @@ def launch_test(test_case, skip_list=None, exe_list=None): # test_sort_and_select -skip_list = ( - "test_sort_large_slice_xpu", # Hard code CUDA -) +skip_list = ("test_sort_large_slice_xpu",) # Hard code CUDA res += launch_test("test_sort_and_select_xpu.py", skip_list) nn_test_embedding_skip_list = ( @@ -1264,15 +1229,7 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_rnn_retain_variables_xpu_float64", "test_transformerencoderlayer_xpu_float64", "test_variable_sequence_xpu_float64", - # AssertionError: Scalars are not close! - "test_InstanceNorm1d_general_xpu", - "test_InstanceNorm2d_general_xpu", - "test_InstanceNorm3d_general_xpu", # AssertionError: RuntimeError not raised - "test_upsamplingBiMode2d_nonsupported_dtypes_antialias_False_num_channels_3_mode_bicubic_uint8_xpu_uint8", - "test_upsamplingBiMode2d_nonsupported_dtypes_antialias_False_num_channels_3_mode_bilinear_uint8_xpu_uint8", - "test_upsamplingBiMode2d_nonsupported_dtypes_antialias_False_num_channels_5_mode_bicubic_uint8_xpu_uint8", - "test_upsamplingBiMode2d_nonsupported_dtypes_antialias_False_num_channels_5_mode_bilinear_uint8_xpu_uint8", "test_upsamplingBiMode2d_nonsupported_dtypes_antialias_True_num_channels_3_mode_bicubic_uint8_xpu_uint8", "test_upsamplingBiMode2d_nonsupported_dtypes_antialias_True_num_channels_3_mode_bilinear_uint8_xpu_uint8", "test_upsamplingBiMode2d_nonsupported_dtypes_antialias_True_num_channels_5_mode_bicubic_uint8_xpu_uint8", @@ -1315,7 +1272,6 @@ def launch_test(test_case, skip_list=None, exe_list=None): # https://github.com/intel/torch-xpu-ops/issues/461 "test_index_put_src_datatype_xpu_float8_e5m2", "test_index_put_src_datatype_xpu_float8_e4m3fn", - # Regression after PyTorch update # http://github.com/intel/torch-xpu-ops/issues/549 # IndexError: tensors used as indices must be long, byte or bool tensors. @@ -1485,29 +1441,24 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_reference_numerics_large_asinh_xpu_complex128", "test_reference_numerics_large_asinh_xpu_complex64", "test_reference_numerics_large_asinh_xpu_complex32", - # Mismatched elements: 1 / 943593 (0.0%) # Greatest absolute difference: 1.3363442121772096e-05 at index (742, 249) (up to 1e-05 allowed) # Greatest relative difference: 8.852276550896931e-06 at index (742, 249) (up to 1.3e-06 allowed) "test_reference_numerics_normal_nn_functional_tanhshrink_xpu_complex64", - # AssertionError: Tensor-likes are not close! # exceeded maximum allowed difference # Greatest absolute difference: 6.266784475883469e-05 at index (463, 204) (up to 1e-05 allowed) # Greatest relative difference: 1.9145216356264427e-05 at index (463, 204) (up to 1.3e-06 allowed) "test_reference_numerics_normal__refs_asinh_xpu_complex64", "test_reference_numerics_normal_asinh_xpu_complex64", - # Failed: Unexpected success "test_reference_numerics_large_rsqrt_xpu_complex32", - # Numeric difference # https://github.com/intel/torch-xpu-ops/issues/544 # Expected 0.00497517 but got 0.00497520063072443. # Absolute difference: 3.063072442997111e-08 (up to 0.0 allowed) # Relative difference: 6.156719153309558e-06 (up to 1e-06 allowed) "test_log1p_complex_xpu_complex64", - # Issue: https://github.com/intel/torch-xpu-ops/issues/622 # Mismatched elements: 8 / 943593 (0.0%) # Greatest absolute difference: inf at index (9, 860) (up to 0.001 allowed) @@ -1863,8 +1814,6 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_compile_int4_mm_m_64_k_32_n_64_xpu", "test_compile_int4_mm_m_64_k_64_n_48_xpu", "test_compile_int4_mm_m_64_k_64_n_64_xpu", - # Short is not supported in oneDNN! - "test_mm_empty_inputs_mixed_dtype_errors_xpu", # XPU does not support tunable. "test_bmm_tunableop_rocm_xpu_float32", "test_numeric_check_leak_tunableop_rocm_xpu_float32", @@ -2943,8 +2892,6 @@ def launch_test(test_case, skip_list=None, exe_list=None): "test_big_num_tensors__foreach_max_use_cuda_graph_True_xpu_float64", "test_big_num_tensors__foreach_norm_use_cuda_graph_True_xpu_float32", "test_big_num_tensors__foreach_norm_use_cuda_graph_True_xpu_float64", - # AssertionError: Tensor-likes are not close! - "test_pointwise_op_with_tensor_of_scalarlist_overload__foreach_addcdiv_is_fastpath_True_xpu_float16", ) res += launch_test("test_foreach_xpu.py", skip_list) @@ -2964,10 +2911,13 @@ def launch_test(test_case, skip_list=None, exe_list=None): res += launch_test("nn/test_convolution_xpu.py", skip_list) # test_dynamic_shapes + + res += launch_test("test_dynamic_shapes_xpu.py") # test_load_state_dict + res += launch_test("nn/test_load_state_dict_xpu.py") # test_module_hooks @@ -2980,6 +2930,7 @@ def launch_test(test_case, skip_list=None, exe_list=None): # test_parametrization + res += launch_test("nn/test_parametrization_xpu.py") exit_code = os.WEXITSTATUS(res) diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py index 483af50f7..c3a268024 100644 --- a/test/xpu/xpu_test_utils.py +++ b/test/xpu/xpu_test_utils.py @@ -4,6 +4,7 @@ import copy import os import sys +import unittest import torch from torch import bfloat16, cuda @@ -230,9 +231,25 @@ "copysign", "count_nonzero", "nan_to_num", + "scatter_reduce", "nanmean", ] +# some case fail in cuda becasue of cuda's bug, so cuda set xfail in opdb +# but xpu can pass these case, and assert 'unexpected success' +# the list will pass these case. + + +_cuda_xfail_xpu_pass = [ + ("rsqrt", "test_reference_numerics_large"), + ("_batch_norm_with_update", "test_noncontiguous_samples"), + ("_batch_norm_with_update", "test_dispatch_symbolic_meta_outplace_all_strides"), + ("histc", "test_out"), + ("logcumsumexp", "test_out_warning"), + ("_refs.mul", "test_python_ref"), + ("_refs.mul", "test_python_ref_torch_fallback"), +] + def get_wrapped_fn(fn): if hasattr(fn, "__wrapped__"): @@ -512,38 +529,35 @@ def __init__(self, patch_test_case=True) -> None: self.cuda_is_bf16_supported = cuda.is_bf16_supported def align_db_decorators(self, db): - for info in db: - decorator_xpu = [] + def gen_xpu_wrappers(name, wrappers): + wrapper_xpu = [] replaced = False - for decorator in info.decorators: - if type(decorator) == DecorateInfo: - if decorator.device_type == "cuda": - decorator_xpu.append(decorator) - decorator.device_type = "xpu" - replaced = True - else: - decorator_xpu.append(decorator) - elif self.only_cuda_fn == decorator: - decorator_xpu.append(common_device_type.onlyCUDA) + for wrapper in wrappers: + if type(wrapper) == DecorateInfo: + if wrapper.device_type == "cuda": + if ( + unittest.expectedFailure in wrapper.decorators + and (name, wrapper.test_name) in _cuda_xfail_xpu_pass + ): + pass + else: + wrapper.device_type = "xpu" + replaced = True + wrapper_xpu.append(wrapper) + elif self.only_cuda_fn == wrapper: + wrapper_xpu.append(common_device_type.onlyCUDA) replaced = True - if replaced: - info.decorators = tuple(decorator_xpu) - skip_xpu = [] - replaced = False + return replaced, wrapper_xpu + + for info in db: + if hasattr(info, "decorators"): + replaced, decorator_xpu = gen_xpu_wrappers(info.name, info.decorators) + if replaced: + info.decorators = tuple(decorator_xpu) if hasattr(info, "skips"): - for skip in info.skips: - if type(skip) == DecorateInfo: - if skip.device_type == "cuda": - skip_xpu.append(decorator) - skip.device_type = "xpu" - replaced = True - else: - skip_xpu.append(skip) - elif self.only_cuda_fn == skip: - skip_xpu.append(common_device_type.onlyCUDA) - replaced = True - if replaced: - info.skips = tuple(skip_xpu) + replaced, skip_xpu = gen_xpu_wrappers(info.name, info.skips) + if replaced: + info.skips = tuple(skip_xpu) def align_supported_dtypes(self, db): for opinfo in db: