From 7b266fce58eadb4164f4631f303ad6ec764cc128 Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Tue, 3 Sep 2024 20:43:17 +0800 Subject: [PATCH] refined align_supported_dtypes and test_ops.py skip list (#779) 1. updated align_supported_dyptes() a) if forward does not support bfloat16, do not add it for backward b) for _refs.xxx, if op xxx is supported by torch-xpu-ops, also align the backward dtypes with CUDA. 2. added more ops in _xpu_computation_op_list to align the dtypes with cuda 3. updated skip list: a) Added some cases in skip list because with 1 and 2 more cases are added, while some cases are XFAIL with CUDA but passed with XPU. b) Removed cases from skip list: # No this case anymore # "test_python_ref__refs_linspace_tensor_overload_xpu_int16", # "test_python_ref__refs_linspace_tensor_overload_xpu_int32", # "test_python_ref__refs_linspace_tensor_overload_xpu_int64", # "test_python_ref__refs_linspace_tensor_overload_xpu_int8", # "test_python_ref__refs_linspace_tensor_overload_xpu_uint8", # "test_python_ref__refs_linspace_xpu_int16", # "test_python_ref__refs_linspace_xpu_int32", # "test_python_ref__refs_linspace_xpu_int64", # "test_python_ref__refs_linspace_xpu_int8", # "test_python_ref__refs_linspace_xpu_uint8", # "test_python_ref__refs_logaddexp_xpu_complex128", # "test_python_ref__refs_logaddexp_xpu_complex64", # "test_python_ref__refs_native_layer_norm_xpu_bfloat16", # "test_python_ref__refs_native_layer_norm_xpu_float16", # "test_python_ref__refs_native_layer_norm_xpu_float32", # "test_python_ref__refs_nn_functional_hinge_embedding_loss_xpu_bfloat16", # "test_python_ref__refs_nn_functional_hinge_embedding_loss_xpu_float16", # "test_python_ref__refs_nn_functional_margin_ranking_loss_xpu_bfloat16", # "test_python_ref__refs_nn_functional_margin_ranking_loss_xpu_float16", # "test_python_ref__refs_nn_functional_triplet_margin_loss_xpu_uint8", # "test_python_ref__refs_square_xpu_bool", # "test_python_ref__refs_trunc_xpu_float64", # skipped # "test_python_ref_executor__refs_geometric_executor_aten_xpu_bfloat16", # "test_python_ref_executor__refs_geometric_executor_aten_xpu_float16", # "test_python_ref_executor__refs_geometric_executor_aten_xpu_float32", # "test_python_ref_executor__refs_geometric_executor_aten_xpu_float64", # "test_python_ref_executor__refs_geometric_executor_aten_xpu_int16", # "test_python_ref_executor__refs_geometric_executor_aten_xpu_int32", # "test_python_ref_executor__refs_geometric_executor_aten_xpu_int64", # "test_python_ref_executor__refs_geometric_executor_aten_xpu_int8", # "test_python_ref_executor__refs_geometric_executor_aten_xpu_uint8", # "test_python_ref_executor__refs_linspace_executor_aten_xpu_int16", # "test_python_ref_executor__refs_linspace_executor_aten_xpu_int32", # "test_python_ref_executor__refs_linspace_executor_aten_xpu_int64", # "test_python_ref_executor__refs_linspace_executor_aten_xpu_int8", # "test_python_ref_executor__refs_linspace_executor_aten_xpu_uint8", # "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_int16", # "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_int32", # "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_int64", # "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_int8", # "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_uint8", # "test_python_ref_executor__refs_log_normal_executor_aten_xpu_bfloat16", # "test_python_ref_executor__refs_log_normal_executor_aten_xpu_float16", # "test_python_ref_executor__refs_log_normal_executor_aten_xpu_float32", # "test_python_ref_executor__refs_log_normal_executor_aten_xpu_float64", # skipped # "test_python_ref_executor__refs_native_layer_norm_executor_aten_xpu_bfloat16", # "test_python_ref_executor__refs_native_layer_norm_executor_aten_xpu_float16", # need to add native_layer_norm to list #"test_python_ref_executor__refs_native_layer_norm_executor_aten_xpu_float32", # skipped # "test_python_ref_executor__refs_nn_functional_alpha_dropout_executor_aten_xpu_bfloat16", # "test_python_ref_executor__refs_nn_functional_alpha_dropout_executor_aten_xpu_float16", # "test_python_ref_executor__refs_nn_functional_alpha_dropout_executor_aten_xpu_float32", # "test_python_ref_executor__refs_nn_functional_alpha_dropout_executor_aten_xpu_float64", # skipped # "test_python_ref_executor__refs_nn_functional_hinge_embedding_loss_executor_aten_xpu_bfloat16", # "test_python_ref_executor__refs_nn_functional_hinge_embedding_loss_executor_aten_xpu_float16", # "test_python_ref_executor__refs_nn_functional_margin_ranking_loss_executor_aten_xpu_bfloat16", # "test_python_ref_executor__refs_nn_functional_margin_ranking_loss_executor_aten_xpu_float16", # "test_python_ref_executor__refs_nn_functional_triplet_margin_loss_executor_aten_xpu_uint8", # skipped # "test_python_ref_executor__refs_vdot_executor_aten_xpu_complex128", # "test_python_ref_executor__refs_vdot_executor_aten_xpu_complex64", # skipped # "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int16", # "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int32", # "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int64", # "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int8", # "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_uint8", # "test_python_ref_torch_fallback__refs_linspace_xpu_int16", # "test_python_ref_torch_fallback__refs_linspace_xpu_int32", # "test_python_ref_torch_fallback__refs_linspace_xpu_int64", # "test_python_ref_torch_fallback__refs_linspace_xpu_int8", # "test_python_ref_torch_fallback__refs_linspace_xpu_uint8", # "test_python_ref_torch_fallback__refs_logaddexp_xpu_complex128", # "test_python_ref_torch_fallback__refs_logaddexp_xpu_complex64", # "test_python_ref_torch_fallback__refs_native_layer_norm_xpu_bfloat16", # "test_python_ref_torch_fallback__refs_native_layer_norm_xpu_float16", # skipped? added native_layer_norm in op list #"test_python_ref_torch_fallback__refs_native_layer_norm_xpu_float32", # skipped # "test_python_ref_torch_fallback__refs_nn_functional_hinge_embedding_loss_xpu_bfloat16", # "test_python_ref_torch_fallback__refs_nn_functional_hinge_embedding_loss_xpu_float16", # "test_python_ref_torch_fallback__refs_nn_functional_margin_ranking_loss_xpu_bfloat16", # "test_python_ref_torch_fallback__refs_nn_functional_margin_ranking_loss_xpu_float16", # "test_python_ref_torch_fallback__refs_sinh_xpu_complex128", # "test_python_ref_torch_fallback__refs_special_multigammaln_mvlgamma_p_5_xpu_int32", # skipped? #"test_python_ref_torch_fallback__refs_square_xpu_bool", # skipped # "test_python_ref_torch_fallback__refs_vdot_xpu_complex128", # "test_python_ref_torch_fallback__refs_vdot_xpu_complex64", # skipped #"test_compare_cpu__refs_special_zeta_xpu_float32", # skipped ? #"test_python_ref__refs_heaviside_xpu_int64", # skipped # "test_python_ref__refs_special_bessel_j0_xpu_int64", # "test_python_ref_errors__refs_dstack_xpu", # "test_python_ref_errors__refs_hstack_xpu", # "test_python_ref_errors__refs_linalg_cross_xpu", # "test_python_ref_errors__refs_vstack_xpu", # "test_python_ref_executor__refs_mul_executor_aten_xpu_complex32", # "test_python_ref__refs_special_multigammaln_mvlgamma_p_5_xpu_float64", # "test_python_ref_executor__refs_special_multigammaln_mvlgamma_p_3_executor_aten_xpu_float64", # "test_python_ref__refs_square_xpu_complex128", # "test_python_ref__refs_square_xpu_complex64", # "test_python_ref_executor__refs_istft_executor_aten_xpu_complex128", # "test_python_ref_executor__refs_square_executor_aten_xpu_complex128", # "test_python_ref_torch_fallback__refs_square_xpu_complex128", # "test_python_ref_torch_fallback__refs_square_xpu_complex64", #Fixed with xpu_test_utils.py and not add bfloat16 in backward if bloat16 is not enabled in forward #"test_dtypes_view_as_complex_xpu", # Didn't align with CUDA, The following dtypes did not work in backward but are listed by the OpInfo: {torch.bfloat16} #"test_dtypes_view_as_real_xpu", # Didn't align with CUDA, The following dtypes did not work in backward but are listed by the OpInfo: {torch.bfloat16} # no this test now #"test_noncontiguous_samples_native_dropout_backward_xpu_int64", # The implementation aligns with CUDA, RuntimeError: "masked_scale" not implemented for 'Long'. #"test_non_standard_bool_values_native_dropout_backward_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "masked_scale" not implemented for 'Bool'. #"test_non_standard_bool_values_scatter_reduce_amax_xpu_bool", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' #"test_non_standard_bool_values_scatter_reduce_amin_xpu_bool", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' #"test_non_standard_bool_values_scatter_reduce_prod_xpu_bool", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' # skipped by addeding argsort and sort to list #"test_non_standard_bool_values_argsort_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "argsort" not implemented for 'Bool'. # skipped #"test_out_triangular_solve_xpu_float32", # add square in op list, this op is defined in aten, no need backend. #"test_python_ref_executor__refs_square_executor_aten_xpu_bool", # need to add native_layer_norm to list #"test_python_ref_executor__refs_native_layer_norm_executor_aten_xpu_float32", --------- Co-authored-by: Huaiyu, Zheng --- test/xpu/extended/skip_list_common.py | 16 ++ test/xpu/skip_list_common.py | 246 +++++++++++--------------- test/xpu/test_indexing_xpu.py | 31 ++++ test/xpu/xpu_test_utils.py | 17 +- 4 files changed, 166 insertions(+), 144 deletions(-) diff --git a/test/xpu/extended/skip_list_common.py b/test/xpu/extended/skip_list_common.py index 4e9be7dbd..d754c9150 100644 --- a/test/xpu/extended/skip_list_common.py +++ b/test/xpu/extended/skip_list_common.py @@ -146,5 +146,21 @@ "test_compare_cpu_nanmedian_xpu_int64", "test_compare_cpu_nanmedian_xpu_int8", "test_compare_cpu_nanmedian_xpu_uint8", + + # sort algorithm is different to cpu + "test_compare_cpu_argsort_xpu_bfloat16", + "test_compare_cpu_argsort_xpu_float16", + "test_compare_cpu_argsort_xpu_int8", + "test_compare_cpu_argsort_xpu_uint8", + + # AssertionError: The values for attribute 'dtype' do not match: torch.float32 != torch.bfloat16 + # https://github.com/intel/torch-xpu-ops/issues/780 + "test_compare_cpu_native_layer_norm_xpu_bfloat16", + "test_compare_cpu_native_layer_norm_xpu_float16", + + # AssertionError: Tensor-likes are not close! + # https://github.com/intel/torch-xpu-ops/issues/781 + "test_compare_cpu_square_xpu_complex64", + ), } diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index 6be0c624c..6b5df037b 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -1,6 +1,9 @@ skip_dict = { "test_ops_xpu.py": ( # Skip list of base line + + # Need to revisit when the ops are enabled + # AssertionError: The supported dtypes for xxx on device type xpu are incorrect! "test_dtypes___rmod___xpu", "test_dtypes_nn_functional_conv1d_xpu", "test_dtypes_nn_functional_conv2d_xpu", @@ -10,176 +13,122 @@ "test_dtypes_nn_functional_conv_transpose3d_xpu", "test_dtypes_nn_functional_softsign_xpu", "test_dtypes_sparse_sampled_addmm_xpu", - "test_compare_cpu_sparse_sampled_addmm_xpu_float32", - "test_compare_cpu_to_sparse_xpu_float32", + # AssertionError: RuntimeError not raised "test_errors_dot_xpu", - "test_errors_kthvalue_xpu", - "test_errors_sparse_mul_layout0_xpu", - "test_errors_sparse_mul_layout1_xpu", - "test_errors_sparse_mul_layout2_xpu", - "test_errors_sparse_mul_layout3_xpu", + "test_errors_kthvalue_xpu", "test_errors_take_xpu", "test_errors_vdot_xpu", - "test_non_standard_bool_values_to_sparse_xpu_bool", + # Fallback cases with skipCPUIfNoLapack, AssertionError: Tensor-likes are not close! "test_noncontiguous_samples_linalg_det_xpu_float32", "test_noncontiguous_samples_linalg_slogdet_xpu_float32", "test_noncontiguous_samples_linalg_solve_ex_xpu_float32", "test_noncontiguous_samples_linalg_solve_xpu_float32", "test_noncontiguous_samples_linalg_tensorsolve_xpu_float32", "test_noncontiguous_samples_logdet_xpu_float32", - "test_noncontiguous_samples_nn_functional_conv1d_xpu_int64", - "test_noncontiguous_samples_nn_functional_conv2d_xpu_int64", - "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64", + "test_noncontiguous_samples_nn_functional_rrelu_xpu_float32", "test_noncontiguous_samples_nn_functional_conv3d_xpu_complex64", - "test_noncontiguous_samples_nn_functional_conv_transpose1d_xpu_int64", + "test_variant_consistency_eager_nn_functional_rrelu_xpu_float32", + + # RuntimeError: device type of values (xpu) must be CPU or CUDA or Meta + # https://github.com/intel/torch-xpu-ops/issues/357 + "test_compare_cpu_sparse_sampled_addmm_xpu_float32", + "test_errors_sparse_mul_layout0_xpu", + "test_errors_sparse_mul_layout1_xpu", + "test_errors_sparse_mul_layout2_xpu", + "test_errors_sparse_mul_layout3_xpu", + "test_out_requires_grad_error_sparse_sampled_addmm_xpu_complex64", + "test_out_requires_grad_error_sparse_sampled_addmm_xpu_float32", + + # NotImplementedError: Could not run 'aten::_to_dense' with arguments from the 'SparseXPU' backend. + # https://github.com/intel/torch-xpu-ops/issues/357 + "test_compare_cpu_to_sparse_xpu_float32", + "test_variant_consistency_eager_to_sparse_xpu_float32", + + # RuntimeError: sparse_dim expected sparse or strided tensor layout but got Sparse + # Issue https://github.com/intel/torch-xpu-ops/issues/357 + "test_variant_consistency_eager_to_sparse_xpu_complex64", + "test_non_standard_bool_values_to_sparse_xpu_bool", + + # OneDNN issues, https://github.com/intel/torch-xpu-ops/issues/253 + # RuntimeError: Long is not supported in oneDNN! + # RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive + # RuntimeError: Double and complex datatype matmul is not supported in oneDNN + "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64", + "test_noncontiguous_samples_nn_functional_conv_transpose1d_xpu_int64", # "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_complex64", "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_float32", "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_int64", "test_noncontiguous_samples_nn_functional_conv_transpose3d_xpu_complex64", "test_noncontiguous_samples_nn_functional_conv_transpose3d_xpu_float32", "test_noncontiguous_samples_nn_functional_conv_transpose3d_xpu_int64", - "test_noncontiguous_samples_nn_functional_rrelu_xpu_float32", + "test_noncontiguous_samples_nn_functional_conv1d_xpu_int64", + "test_noncontiguous_samples_nn_functional_conv2d_xpu_int64", + + # RuntimeError: mode only supports CPU AND CUDA device type, got: xpu + # Issue https://github.com/intel/torch-xpu-ops/issues/327 "test_numpy_ref_linalg_tensorinv_xpu_float64", "test_out_mode_xpu_float32", + + # RuntimeError: false INTERNAL ASSERT FAILED at "/home/gta/daisyden/pytorch4/aten/src/ATen/native/DispatchStub.cpp":220, please report a bug to PyTorch. DispatchStub: missing kernel for xpu "test_out_nanmean_xpu_float32", - "test_out_requires_grad_error_sparse_sampled_addmm_xpu_complex64", - "test_out_requires_grad_error_sparse_sampled_addmm_xpu_float32", - "test_out_warning_nanmean_xpu", - "test_python_ref__refs_linspace_tensor_overload_xpu_int16", - "test_python_ref__refs_linspace_tensor_overload_xpu_int32", - "test_python_ref__refs_linspace_tensor_overload_xpu_int64", - "test_python_ref__refs_linspace_tensor_overload_xpu_int8", - "test_python_ref__refs_linspace_tensor_overload_xpu_uint8", - "test_python_ref__refs_linspace_xpu_int16", - "test_python_ref__refs_linspace_xpu_int32", - "test_python_ref__refs_linspace_xpu_int64", - "test_python_ref__refs_linspace_xpu_int8", - "test_python_ref__refs_linspace_xpu_uint8", - "test_python_ref__refs_logaddexp_xpu_complex128", - "test_python_ref__refs_logaddexp_xpu_complex64", - "test_python_ref__refs_native_layer_norm_xpu_bfloat16", - "test_python_ref__refs_native_layer_norm_xpu_float16", - "test_python_ref__refs_native_layer_norm_xpu_float32", - "test_python_ref__refs_nn_functional_hinge_embedding_loss_xpu_bfloat16", - "test_python_ref__refs_nn_functional_hinge_embedding_loss_xpu_float16", - "test_python_ref__refs_nn_functional_margin_ranking_loss_xpu_bfloat16", - "test_python_ref__refs_nn_functional_margin_ranking_loss_xpu_float16", - "test_python_ref__refs_nn_functional_triplet_margin_loss_xpu_uint8", - "test_python_ref__refs_square_xpu_bool", - "test_python_ref__refs_trunc_xpu_float64", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_bfloat16", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_float16", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_float32", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_float64", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_int16", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_int32", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_int64", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_int8", - "test_python_ref_executor__refs_geometric_executor_aten_xpu_uint8", - "test_python_ref_executor__refs_linspace_executor_aten_xpu_int16", - "test_python_ref_executor__refs_linspace_executor_aten_xpu_int32", - "test_python_ref_executor__refs_linspace_executor_aten_xpu_int64", - "test_python_ref_executor__refs_linspace_executor_aten_xpu_int8", - "test_python_ref_executor__refs_linspace_executor_aten_xpu_uint8", - "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_int16", - "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_int32", - "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_int64", - "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_int8", - "test_python_ref_executor__refs_linspace_tensor_overload_executor_aten_xpu_uint8", - "test_python_ref_executor__refs_log_normal_executor_aten_xpu_bfloat16", - "test_python_ref_executor__refs_log_normal_executor_aten_xpu_float16", - "test_python_ref_executor__refs_log_normal_executor_aten_xpu_float32", - "test_python_ref_executor__refs_log_normal_executor_aten_xpu_float64", + "test_out_warning_nanmean_xpu", + + # NameError: name 'nanj' is not defined. Did you mean: 'nan'? + # https://github.com/intel/torch-xpu-ops/issues/768 "test_python_ref_executor__refs_logaddexp_executor_aten_xpu_complex128", "test_python_ref_executor__refs_logaddexp_executor_aten_xpu_complex64", - "test_python_ref_executor__refs_native_layer_norm_executor_aten_xpu_bfloat16", - "test_python_ref_executor__refs_native_layer_norm_executor_aten_xpu_float16", - "test_python_ref_executor__refs_native_layer_norm_executor_aten_xpu_float32", - "test_python_ref_executor__refs_nn_functional_alpha_dropout_executor_aten_xpu_bfloat16", - "test_python_ref_executor__refs_nn_functional_alpha_dropout_executor_aten_xpu_float16", - "test_python_ref_executor__refs_nn_functional_alpha_dropout_executor_aten_xpu_float32", - "test_python_ref_executor__refs_nn_functional_alpha_dropout_executor_aten_xpu_float64", - "test_python_ref_executor__refs_nn_functional_hinge_embedding_loss_executor_aten_xpu_bfloat16", - "test_python_ref_executor__refs_nn_functional_hinge_embedding_loss_executor_aten_xpu_float16", - "test_python_ref_executor__refs_nn_functional_margin_ranking_loss_executor_aten_xpu_bfloat16", - "test_python_ref_executor__refs_nn_functional_margin_ranking_loss_executor_aten_xpu_float16", - "test_python_ref_executor__refs_nn_functional_triplet_margin_loss_executor_aten_xpu_uint8", - "test_python_ref_executor__refs_square_executor_aten_xpu_bool", - "test_python_ref_executor__refs_vdot_executor_aten_xpu_complex128", - "test_python_ref_executor__refs_vdot_executor_aten_xpu_complex64", - "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int16", - "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int32", - "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int64", - "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_int8", - "test_python_ref_torch_fallback__refs_linspace_tensor_overload_xpu_uint8", - "test_python_ref_torch_fallback__refs_linspace_xpu_int16", - "test_python_ref_torch_fallback__refs_linspace_xpu_int32", - "test_python_ref_torch_fallback__refs_linspace_xpu_int64", - "test_python_ref_torch_fallback__refs_linspace_xpu_int8", - "test_python_ref_torch_fallback__refs_linspace_xpu_uint8", - "test_python_ref_torch_fallback__refs_logaddexp_xpu_complex128", - "test_python_ref_torch_fallback__refs_logaddexp_xpu_complex64", - "test_python_ref_torch_fallback__refs_native_layer_norm_xpu_bfloat16", - "test_python_ref_torch_fallback__refs_native_layer_norm_xpu_float16", - "test_python_ref_torch_fallback__refs_native_layer_norm_xpu_float32", - "test_python_ref_torch_fallback__refs_nn_functional_hinge_embedding_loss_xpu_bfloat16", - "test_python_ref_torch_fallback__refs_nn_functional_hinge_embedding_loss_xpu_float16", - "test_python_ref_torch_fallback__refs_nn_functional_margin_ranking_loss_xpu_bfloat16", - "test_python_ref_torch_fallback__refs_nn_functional_margin_ranking_loss_xpu_float16", - "test_python_ref_torch_fallback__refs_sinh_xpu_complex128", - "test_python_ref_torch_fallback__refs_special_multigammaln_mvlgamma_p_5_xpu_int32", - "test_python_ref_torch_fallback__refs_square_xpu_bool", - "test_python_ref_torch_fallback__refs_vdot_xpu_complex128", - "test_python_ref_torch_fallback__refs_vdot_xpu_complex64", + + # RuntimeError: could not create a primitive descriptor for a deconvolution + # https://github.com/intel/torch-xpu-ops/issues/253 "test_variant_consistency_eager_nn_functional_conv_transpose2d_xpu_complex64", "test_variant_consistency_eager_nn_functional_conv_transpose2d_xpu_float32", "test_variant_consistency_eager_nn_functional_conv_transpose3d_xpu_complex64", "test_variant_consistency_eager_nn_functional_conv_transpose3d_xpu_float32", - "test_variant_consistency_eager_nn_functional_rrelu_xpu_float32", - "test_variant_consistency_eager_to_sparse_xpu_complex64", - "test_variant_consistency_eager_to_sparse_xpu_float32", - "test_compare_cpu__refs_special_zeta_xpu_float32", + + # Need revisit when the op is enabled + # Unexpected success, xpu passed because it compares to cpu "test_compare_cpu_linalg_lu_factor_ex_xpu_float32", "test_compare_cpu_linalg_lu_factor_xpu_float32", "test_compare_cpu_linalg_lu_xpu_float32", "test_compare_cpu_special_hermite_polynomial_h_xpu_float32", - "test_compare_cpu_special_zeta_xpu_float32", + "test_compare_cpu_special_zeta_xpu_float32", + + # XFAIL of CUDA and XPU, unexpected success in fallback "test_out_cholesky_inverse_xpu_float32", "test_out_geqrf_xpu_float32", "test_out_narrow_copy_xpu_float32", "test_out_ormqr_xpu_float32", - "test_out_triangular_solve_xpu_float32", - "test_python_ref__refs_heaviside_xpu_int64", - "test_python_ref__refs_special_bessel_j0_xpu_int64", - "test_python_ref_errors__refs_dstack_xpu", - "test_python_ref_errors__refs_hstack_xpu", - "test_python_ref_errors__refs_linalg_cross_xpu", - "test_python_ref_errors__refs_vstack_xpu", + + # XFAIL of CUDA, XPU got unexpected success + "test_python_ref__refs_div_no_rounding_mode_xpu_complex32", + "test_python_ref__refs_pow_xpu_complex32", "test_python_ref_executor__refs_mul_executor_aten_xpu_complex32", - "test_python_ref__refs_special_multigammaln_mvlgamma_p_5_xpu_float64", - "test_python_ref_executor__refs_special_multigammaln_mvlgamma_p_3_executor_aten_xpu_float64", - "test_python_ref__refs_square_xpu_complex128", - "test_python_ref__refs_square_xpu_complex64", - "test_python_ref_executor__refs_istft_executor_aten_xpu_complex128", - "test_python_ref_executor__refs_square_executor_aten_xpu_complex128", - "test_python_ref_torch_fallback__refs_square_xpu_complex128", - "test_python_ref_torch_fallback__refs_square_xpu_complex64", - # Skip list of new added when porting XPU operators. - # See: https://github.com/intel/torch-xpu-ops/issues/128 - "test_dtypes_view_as_complex_xpu", # Didn't align with CUDA, The following dtypes did not work in backward but are listed by the OpInfo: {torch.bfloat16} - "test_dtypes_view_as_real_xpu", # Didn't align with CUDA, The following dtypes did not work in backward but are listed by the OpInfo: {torch.bfloat16} - "test_noncontiguous_samples_native_dropout_backward_xpu_int64", # The implementation aligns with CUDA, RuntimeError: "masked_scale" not implemented for 'Long'. - "test_non_standard_bool_values_native_dropout_backward_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "masked_scale" not implemented for 'Bool'. - "test_non_standard_bool_values_scatter_reduce_amax_xpu_bool", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' - "test_non_standard_bool_values_scatter_reduce_amin_xpu_bool", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' - "test_non_standard_bool_values_scatter_reduce_prod_xpu_bool", # Align with CUDA dtypes - "scatter_gather_base_kernel_func" not implemented for 'Bool' - "test_non_standard_bool_values_argsort_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "argsort" not implemented for 'Bool'. - "test_non_standard_bool_values_msort_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "msort" not implemented for 'Bool'. + "test_python_ref_torch_fallback__refs_div_no_rounding_mode_xpu_complex32", + "test_python_ref__refs_pow_xpu_complex32", + "test_python_ref_executor__refs_mul_executor_aten_xpu_complex32", + "test_python_ref_torch_fallback__refs_div_no_rounding_mode_xpu_complex32", + "test_python_ref_torch_fallback__refs_pow_xpu_complex32", + + + # unexpected success because of cpu fallback + "test_out_triangular_solve_xpu_float32", + + # Newly added: + + # Cuda skipped it "test_non_standard_bool_values_sort_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "sort" not implemented for 'Bool'. + + # Cuda skipped it + "test_non_standard_bool_values_msort_xpu_bool", # The implementation aligns with CUDA, RuntimeError: "msort" not implemented for 'Bool'. + + + # Unexpected success "test_python_ref_executor__refs_pow_executor_aten_xpu_complex32", # Didn't align with CUDA, Unexpected success + # Unexpected success - "test_errors_histogramdd_xpu", + # "test_errors_histogramdd_xpu", #XFAIL now # Jiterator is only supported on CUDA and ROCm GPUs, none are available. + # https://github.com/intel/torch-xpu-ops/issues/584 "_jiterator_", # https://github.com/intel/torch-xpu-ops/issues/157 # Segfault: @@ -473,9 +422,11 @@ "test_variant_consistency_eager_triangular_solve_xpu_complex64", # oneDNN issues # RuntimeError: value cannot be converted to type float without overflow + # https://github.com/intel/torch-xpu-ops/issues/683 "test_conj_view_addbmm_xpu_complex64", "test_neg_conj_view_addbmm_xpu_complex128", # CPU fallback error: AssertionError: Tensor-likes are not close! + # https://github.com/intel/torch-xpu-ops/issues/271 "test_neg_view_nn_functional_rrelu_xpu_float64", ### Error #0 in TestMathBitsXPU , RuntimeError: Double and complex datatype matmul is not supported in oneDNN # https://github.com/intel/torch-xpu-ops/issues/254 @@ -693,7 +644,7 @@ "test_neg_view_nn_functional_conv_transpose2d_xpu_float64", "test_neg_view_nn_functional_conv_transpose3d_xpu_float64", ### Error #2 in TestMathBitsXPU , NotImplementedError: Could not run 'aten::_sparse_coo_tensor_with_dims_and_tensors' with arguments from the 'SparseXPU' backend. - # https://github.com/intel/torch-xpu-ops/issues/242 + # https://github.com/intel/torch-xpu-ops/issues/242 and https://github.com/intel/torch-xpu-ops/issues/240 "test_conj_view_to_sparse_xpu_complex64", "test_neg_conj_view_to_sparse_xpu_complex128", "test_neg_view_to_sparse_xpu_float64", @@ -703,13 +654,14 @@ # in XPU supported operators. Then the case will work. "test_noncontiguous_samples_nn_functional_avg_pool1d_xpu_int64", "test_noncontiguous_samples_nn_functional_local_response_norm_xpu_int64", + #AssertionError: The supported dtypes for unique_consecutive on device type xpu are incorrect! #The following dtypes worked in forward but are not listed by the OpInfo: {torch.bfloat16}. - #XPU supports bfloat16, CUDA doesn't support it. + # XPU supports bfloat16, CUDA doesn't support it. "test_dtypes_unique_xpu", # RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16. # Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error. - "test_dtypes_polar_xpu", + #"test_dtypes_polar_xpu", # implemented aten::histogram to align MPS operators coverage, CUDA doesn't support # but test_dtypes infrastructure leverage CUDA supported datatypes "test_dtypes_histogram_xpu", @@ -718,7 +670,15 @@ # 1. most cases of nextafter require Half dtype. # 2. Half dtype is a common dtype in workloads. # So far CUDA doesn't support Half, so that XPU fails as we aligned claimed dtypes with CUDA in test infra. + # https://github.com/intel/torch-xpu-ops/issues/623 "test_dtypes_nextafter_xpu", + + # AssertionError: The supported dtypes for argsort on device type xpu are incorrect! + # The following dtypes worked in forward but are not listed by the OpInfo: {torch.bool}. + # CUDA does not have torch.bool support on argsort. + "test_dtypes_argsort_xpu", + # Unexpected success, CUDA got XFAIL because CUDA does not have historgramadd supported" + "test_errors_histogramdd_xpu", ), "test_binary_ufuncs_xpu.py": ( @@ -1160,9 +1120,9 @@ # It is kernel assert on XPU implementation not exception on host. # We are same as CUDA implementation. And CUDA skips these cases. "test_trivial_fancy_out_of_bounds_xpu", - "test_advancedindex", - # CUDA bias case - "test_index_put_accumulate_with_optional_tensors_xpu", + # index boundary should be checked. + # https://github.com/intel/torch-xpu-ops/issues/783 + "test_advancedindex_xpu_float64", # XPU implementation doesn't claimn FP8 now # https://github.com/intel/torch-xpu-ops/issues/461 "test_index_put_src_datatype_xpu_float8_e5m2", @@ -1305,10 +1265,18 @@ "test_reference_numerics_normal_polygamma_polygamma_n_2_xpu_float16", "test_reference_numerics_normal_polygamma_polygamma_n_3_xpu_float16", "test_reference_numerics_normal_polygamma_polygamma_n_4_xpu_float16", + + # CUDA XFAIL + "test_reference_numerics_large__refs_rsqrt_xpu_complex32", + + # Compiler issue in handling tanh with real or imag inf. + # https://github.com/intel/torch-xpu-ops/issues/184, https://jira.devtools.intel.com/browse/CMPLRLIBS-34974 + "test_reference_numerics_large__refs_tanh_xpu_complex32", ), "test_masked_xpu.py": ( # RuntimeError: is_coalesced expected sparse coordinate tensor layout but got Sparse. + # https://github.com/intel/torch-xpu-ops/issues/357 "test_mask_layout_sparse_coo_masked_amax_xpu_bfloat16", "test_mask_layout_sparse_coo_masked_amax_xpu_float16", "test_mask_layout_sparse_coo_masked_amax_xpu_float32", @@ -1963,6 +1931,7 @@ "test_maskedtensor_xpu.py": ( # RuntimeError: is_coalesced expected sparse coordinate tensor layout but got Sparse + # https://github.com/intel/torch-xpu-ops/issues/357 "test_contiguous_xpu", "test_invalid_sparse_coo_values_xpu", "test_to_dense_and_sparse_coo_xpu", @@ -2682,6 +2651,7 @@ "test_autodiff__foreach_sigmoid_outplace_xpu_complex128", "test_binary_op_with_scalar_self_support__foreach_pow_is_fastpath_True_xpu_bool", # AssertionError: RuntimeError not raised + # https://github.com/intel/torch-xpu-ops/issues/784 "test_0dim_tensor_overload_exception_xpu", # RuntimeError: Tried to instantiate dummy base class CUDAGraph "test_big_num_tensors__foreach_max_use_cuda_graph_True_xpu_float32", diff --git a/test/xpu/test_indexing_xpu.py b/test/xpu/test_indexing_xpu.py index c3637b0fe..d57567318 100644 --- a/test/xpu/test_indexing_xpu.py +++ b/test/xpu/test_indexing_xpu.py @@ -11,6 +11,37 @@ with XPUPatchForImport(False): from test_indexing import NumpyTests,TestIndexing + import torch + + + def __test_index_put_accumulate_with_optional_tensors(self, device): + # TODO: replace with a better solution. + # Currently, here using torchscript to put None into indices. + # on C++ it gives indices as a list of 2 optional tensors: first is null and + # the second is a valid tensor. + @torch.jit.script + def func(x, i, v): + idx = [None, i] + x.index_put_(idx, v, accumulate=True) + return x + + n = 4 + t = torch.arange(n * 2, dtype=torch.float32).reshape(n, 2) + t_dev = t.to(device) + indices = torch.tensor([1, 0]) + indices_dev = indices.to(device) + value0d = torch.tensor(10.0) + value1d = torch.tensor([1.0, 2.0]) + + out_cuda = func(t_dev, indices_dev, value0d.xpu()) + out_cpu = func(t, indices, value0d) + self.assertEqual(out_cuda.cpu(), out_cpu) + + out_cuda = func(t_dev, indices_dev, value1d.xpu()) + out_cpu = func(t, indices, value1d) + self.assertEqual(out_cuda.cpu(), out_cpu) + + TestIndexing.test_index_put_accumulate_with_optional_tensors = __test_index_put_accumulate_with_optional_tensors instantiate_device_type_tests(NumpyTests, globals(), only_for=("xpu"), allow_xpu=True) diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py index 7672c0bb4..01d140858 100644 --- a/test/xpu/xpu_test_utils.py +++ b/test/xpu/xpu_test_utils.py @@ -237,6 +237,11 @@ "nan_to_num", "scatter_reduce", "nanmean", + "native_layer_norm", + "native_layer_norm_backward", + "square", + "heaviside", + "argsort", ] _ops_without_cuda_support = [ @@ -415,6 +420,7 @@ def ModuleTest_test_xpu(self, test_case): xpu_gradInput = test_case._backward( xpu_module, xpu_input_tuple, xpu_output, xpu_gradOutput ) + test_case.assertEqual( cpu_gradInput, xpu_gradInput, @@ -757,14 +763,13 @@ def gen_xpu_wrappers(op_name, wrappers): def align_supported_dtypes(self, db): for opinfo in db: - if ( - opinfo.name not in _xpu_computation_op_list - or opinfo.name in _ops_without_cuda_support - ): + if ( opinfo.name not in _xpu_computation_op_list and (opinfo.torch_opinfo.name not in _xpu_computation_op_list + if db == common_methods_invocations.python_ref_db else True)) or opinfo.name in _ops_without_cuda_support: opinfo.dtypesIfXPU = opinfo.dtypes else: backward_dtypes = set(opinfo.backward_dtypesIfCUDA) - backward_dtypes.add(bfloat16) + if bfloat16 in opinfo.dtypesIfXPU: + backward_dtypes.add(bfloat16) opinfo.backward_dtypes = tuple(backward_dtypes) if "has_fp64=0" in str(torch.xpu.get_device_properties(0)): @@ -990,4 +995,4 @@ def launch_test(test_case, skip_list=None, exe_list=None): "pytest -v " + test_case ) - return os.system(test_command) \ No newline at end of file + return os.system(test_command)