diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh index ab7d7812d..56d8e3930 100644 --- a/.github/scripts/env.sh +++ b/.github/scripts/env.sh @@ -1,3 +1,4 @@ #!/bin/bash -source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh +source /opt/intel/oneapi/compiler/latest/env/vars.sh +source /opt/intel/oneapi/umf/latest/env/vars.sh source /opt/intel/oneapi/pti/latest/env/vars.sh diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 45fdef513..fd3e25bb0 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -24,6 +24,7 @@ jobs: if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} uses: ./.github/workflows/_linux_ut.yml with: + pytorch: release/2.5 ut: op_example,op_extended,op_ut runner: linux.idc.xpu @@ -32,6 +33,7 @@ jobs: if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} uses: ./.github/workflows/_linux_ut.yml with: + pytorch: release/2.5 abi: 0 ut: op_extended runner: linux.idc.xpu @@ -57,7 +59,7 @@ jobs: pwd cd ../ && rm -rf pytorch source activate e2e_ci - git clone -b main https://github.com/pytorch/pytorch pytorch + git clone -b release/2.5 https://github.com/pytorch/pytorch pytorch cd pytorch # apply PRs for stock pytorch pip install requests diff --git a/cmake/BuildFlags.cmake b/cmake/BuildFlags.cmake index f85598b07..c5d3e5d46 100644 --- a/cmake/BuildFlags.cmake +++ b/cmake/BuildFlags.cmake @@ -40,6 +40,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" list(APPEND SYCL_HOST_FLAGS -Wno-deprecated) list(APPEND SYCL_HOST_FLAGS -Wno-attributes) list(APPEND SYCL_HOST_FLAGS -Wno-sign-compare) + list(APPEND SYCL_HOST_FLAGS -Wno-error=comment) + list(APPEND SYCL_HOST_FLAGS -Wno-error=terminate) endif() if(CMAKE_BUILD_TYPE MATCHES Debug) @@ -47,6 +49,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" list(APPEND SYCL_HOST_FLAGS -O0) endif(CMAKE_BUILD_TYPE MATCHES Debug) + list(APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER}) # -- Kernel flags (SYCL_KERNEL_OPTIONS) # The fast-math will be enabled by default in SYCL compiler. # Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math] @@ -81,10 +84,13 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-error=comment) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-error=terminate) # Equivalent to build option -fpreview-breaking-changes for SYCL compiler. set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_PREVIEW_BREAKING_CHANGES) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}) endif() + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER}) CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU) if(SUPPORTS_FP64_CONV_EMU) diff --git a/cmake/Modules/FindSYCLToolkit.cmake b/cmake/Modules/FindSYCLToolkit.cmake index 46e34c7f8..88e5768c4 100644 --- a/cmake/Modules/FindSYCLToolkit.cmake +++ b/cmake/Modules/FindSYCLToolkit.cmake @@ -77,7 +77,7 @@ endif() # Function to write a test case to verify SYCL features. -function(SYCL_CMPLR_TEST_WRITE src) +function(SYCL_CMPLR_TEST_WRITE src macro_name) set(cpp_macro_if "#if") set(cpp_macro_endif "#endif") @@ -88,8 +88,8 @@ function(SYCL_CMPLR_TEST_WRITE src) # Feature tests goes here - string(APPEND SYCL_CMPLR_TEST_CONTENT "${cpp_macro_if} defined(SYCL_LANGUAGE_VERSION)\n") - string(APPEND SYCL_CMPLR_TEST_CONTENT "cout << \"SYCL_LANGUAGE_VERSION=\"< range, ::sycl::queue q, ker_t ker) { +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::parallel_for(cgh, range, ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { cgh.parallel_for(range, ker); }; + q.submit(cgf); +#endif } // Additional convention of SYCL kernel configuration. Besides construct kernel @@ -80,12 +85,21 @@ sycl_kernel_submit( ::sycl::range local_range, ::sycl::queue q, ker_t ker) { +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ker.sycl_ker_config_convention(cgh); ::sycl::ext::oneapi::experimental::nd_launch( cgh, ::sycl::nd_range(global_range, local_range), ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { + ker.sycl_ker_config_convention(cgh); + cgh.parallel_for( + ::sycl::nd_range(global_range, local_range), ker); + }; + q.submit(cgf); +#endif } template @@ -97,11 +111,19 @@ sycl_kernel_submit( ::sycl::range local_range, ::sycl::queue q, ker_t ker) { +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::nd_launch( cgh, ::sycl::nd_range(global_range, local_range), ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { + cgh.parallel_for( + ::sycl::nd_range(global_range, local_range), ker); + }; + q.submit(cgf); +#endif } template @@ -113,6 +135,7 @@ sycl_kernel_submit( int64_t local_range, ::sycl::queue q, ker_t ker) { +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ker.sycl_ker_config_convention(cgh); ::sycl::ext::oneapi::experimental::nd_launch( @@ -122,6 +145,16 @@ sycl_kernel_submit( ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { + ker.sycl_ker_config_convention(cgh); + cgh.parallel_for( + ::sycl::nd_range<1>( + ::sycl::range<1>(global_range), ::sycl::range<1>(local_range)), + ker); + }; + q.submit(cgf); +#endif } template @@ -133,6 +166,7 @@ sycl_kernel_submit( int64_t local_range, ::sycl::queue q, ker_t ker) { +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::nd_launch( cgh, @@ -141,4 +175,13 @@ sycl_kernel_submit( ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { + cgh.parallel_for( + ::sycl::nd_range<1>( + ::sycl::range<1>(global_range), ::sycl::range<1>(local_range)), + ker); + }; + q.submit(cgf); +#endif } diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py index 01649588f..eba71b17f 100644 --- a/test/xpu/extended/run_test_with_skip.py +++ b/test/xpu/extended/run_test_with_skip.py @@ -148,6 +148,9 @@ "test_compare_cpu_nanmedian_xpu_int64", "test_compare_cpu_nanmedian_xpu_int8", "test_compare_cpu_nanmedian_xpu_uint8", + "test_compare_cpu_nn_functional_unfold_xpu_bool", + "test_non_standard_bool_values_nn_functional_unfold_xpu_bool", + "test_non_standard_bool_values_index_put_xpu_bool", ) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index 0d0f18a86..2b9235efc 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -1,5 +1,7 @@ skip_dict = { "test_ops_xpu.py": ( + "test_noncontiguous_samples_histogram_xpu_float32", + "test_non_standard_bool_values_index_put_xpu_bool", # Skip list of base line "test_dtypes___rmod___xpu", "test_dtypes_nn_functional_conv1d_xpu", @@ -1250,6 +1252,12 @@ ), "test_unary_ufuncs_xpu.py": ( + "test_reference_numerics_extremal__refs_exp2_xpu_complex64", + "test_exp_xpu_complex64", + "test_reference_numerics_extremal__refs_exp_xpu_complex64", + "test_reference_numerics_extremal_exp2_xpu_complex64", + "test_reference_numerics_extremal_exp_xpu_complex64", + "test_reference_numerics_large_exp_xpu_complex32", # AssertionError: Jiterator is only supported on CUDA and ROCm GPUs, none are available. "_jiterator_", # CPU Fallback fails: Tensor-likes are not close! @@ -2153,314 +2161,6 @@ "test_to and not test_to_memory and not test_total", ), - "test_ops_gradients_xpu.py": ( - ### Error #0 in TestBwdGradientsXPU , totally 271 , RuntimeError: Double and complex datatype matmul is not supported in oneDNN - "test_fn_grad___rmatmul___xpu_complex128", - "test_fn_grad___rmatmul___xpu_float64", - "test_fn_grad_addbmm_xpu_float64", - "test_fn_grad_addmm_decomposed_xpu_complex128", - "test_fn_grad_addmm_decomposed_xpu_float64", - "test_fn_grad_addmm_xpu_complex128", - "test_fn_grad_addmm_xpu_float64", - "test_fn_grad_addmv_xpu_complex128", - "test_fn_grad_addmv_xpu_float64", - "test_fn_grad_addr_xpu_complex128", - "test_fn_grad_addr_xpu_float64", - "test_fn_grad_baddbmm_xpu_complex128", - "test_fn_grad_baddbmm_xpu_float64", - "test_fn_grad_bmm_xpu_complex128", - "test_fn_grad_bmm_xpu_float64", - "test_fn_grad_cdist_xpu_float64", - "test_fn_grad_cholesky_inverse_xpu_complex128", - "test_fn_grad_cholesky_inverse_xpu_float64", - "test_fn_grad_cholesky_solve_xpu_complex128", - "test_fn_grad_cholesky_solve_xpu_float64", - "test_fn_grad_cholesky_xpu_complex128", - "test_fn_grad_cholesky_xpu_float64", - "test_fn_grad_corrcoef_xpu_complex128", - "test_fn_grad_corrcoef_xpu_float64", - "test_fn_grad_einsum_xpu_complex128", - "test_fn_grad_einsum_xpu_float64", - "test_fn_grad_inner_xpu_complex128", - "test_fn_grad_inner_xpu_float64", - "test_fn_grad_linalg_cholesky_ex_xpu_complex128", - "test_fn_grad_linalg_cholesky_ex_xpu_float64", - "test_fn_grad_linalg_cholesky_xpu_complex128", - "test_fn_grad_linalg_cholesky_xpu_float64", - "test_fn_grad_linalg_cond_xpu_complex128", - "test_fn_grad_linalg_cond_xpu_float64", - "test_fn_grad_linalg_det_singular_xpu_complex128", - "test_fn_grad_linalg_det_singular_xpu_float64", - "test_fn_grad_linalg_det_xpu_complex128", - "test_fn_grad_linalg_det_xpu_float64", - "test_fn_grad_linalg_eig_xpu_complex128", - "test_fn_grad_linalg_eig_xpu_float64", - "test_fn_grad_linalg_eigh_xpu_complex128", - "test_fn_grad_linalg_eigh_xpu_float64", - "test_fn_grad_linalg_eigvals_xpu_complex128", - "test_fn_grad_linalg_eigvals_xpu_float64", - "test_fn_grad_linalg_eigvalsh_xpu_complex128", - "test_fn_grad_linalg_eigvalsh_xpu_float64", - "test_fn_grad_linalg_householder_product_xpu_complex128", - "test_fn_grad_linalg_householder_product_xpu_float64", - "test_fn_grad_linalg_inv_ex_xpu_complex128", - "test_fn_grad_linalg_inv_ex_xpu_float64", - "test_fn_grad_linalg_inv_xpu_complex128", - "test_fn_grad_linalg_inv_xpu_float64", - "test_fn_grad_linalg_lstsq_grad_oriented_xpu_complex128", - "test_fn_grad_linalg_lstsq_grad_oriented_xpu_float64", - "test_fn_grad_linalg_lu_factor_ex_xpu_complex128", - "test_fn_grad_linalg_lu_factor_ex_xpu_float64", - "test_fn_grad_linalg_lu_factor_xpu_complex128", - "test_fn_grad_linalg_lu_factor_xpu_float64", - "test_fn_grad_linalg_lu_solve_xpu_complex128", - "test_fn_grad_linalg_lu_solve_xpu_float64", - "test_fn_grad_linalg_lu_xpu_complex128", - "test_fn_grad_linalg_lu_xpu_float64", - "test_fn_grad_linalg_matrix_norm_xpu_complex128", - "test_fn_grad_linalg_matrix_norm_xpu_float64", - "test_fn_grad_linalg_matrix_power_xpu_complex128", - "test_fn_grad_linalg_matrix_power_xpu_float64", - "test_fn_grad_linalg_multi_dot_xpu_complex128", - "test_fn_grad_linalg_multi_dot_xpu_float64", - "test_fn_grad_linalg_norm_xpu_float64", - "test_fn_grad_linalg_pinv_hermitian_xpu_complex128", - "test_fn_grad_linalg_pinv_hermitian_xpu_float64", - "test_fn_grad_linalg_pinv_singular_xpu_complex128", - "test_fn_grad_linalg_pinv_singular_xpu_float64", - "test_fn_grad_linalg_pinv_xpu_complex128", - "test_fn_grad_linalg_pinv_xpu_float64", - "test_fn_grad_linalg_qr_xpu_complex128", - "test_fn_grad_linalg_qr_xpu_float64", - "test_fn_grad_linalg_slogdet_xpu_complex128", - "test_fn_grad_linalg_slogdet_xpu_float64", - "test_fn_grad_linalg_solve_ex_xpu_complex128", - "test_fn_grad_linalg_solve_ex_xpu_float64", - "test_fn_grad_linalg_solve_triangular_xpu_complex128", - "test_fn_grad_linalg_solve_triangular_xpu_float64", - "test_fn_grad_linalg_solve_xpu_complex128", - "test_fn_grad_linalg_solve_xpu_float64", - "test_fn_grad_linalg_svd_xpu_complex128", - "test_fn_grad_linalg_svd_xpu_float64", - "test_fn_grad_linalg_svdvals_xpu_complex128", - "test_fn_grad_linalg_svdvals_xpu_float64", - "test_fn_grad_linalg_tensorinv_xpu_complex128", - "test_fn_grad_linalg_tensorinv_xpu_float64", - "test_fn_grad_linalg_tensorsolve_xpu_complex128", - "test_fn_grad_linalg_tensorsolve_xpu_float64", - "test_fn_grad_logdet_xpu_complex128", - "test_fn_grad_logdet_xpu_float64", - "test_fn_grad_lu_solve_xpu_complex128", - "test_fn_grad_lu_solve_xpu_float64", - "test_fn_grad_lu_xpu_complex128", - "test_fn_grad_lu_xpu_float64", - "test_fn_grad_matmul_xpu_complex128", - "test_fn_grad_matmul_xpu_float64", - "test_fn_grad_mm_xpu_complex128", - "test_fn_grad_mm_xpu_float64", - "test_fn_grad_mv_xpu_complex128", - "test_fn_grad_mv_xpu_float64", - "test_fn_grad_nn_functional_bilinear_xpu_float64", - "test_fn_grad_nn_functional_linear_xpu_complex128", - "test_fn_grad_nn_functional_linear_xpu_float64", - "test_fn_grad_nn_functional_multi_head_attention_forward_xpu_float64", - "test_fn_grad_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_fn_grad_norm_nuc_xpu_complex128", - "test_fn_grad_norm_nuc_xpu_float64", - "test_fn_grad_ormqr_xpu_complex128", - "test_fn_grad_ormqr_xpu_float64", - "test_fn_grad_pca_lowrank_xpu_float64", - "test_fn_grad_pinverse_xpu_complex128", - "test_fn_grad_pinverse_xpu_float64", - "test_fn_grad_qr_xpu_complex128", - "test_fn_grad_qr_xpu_float64", - "test_fn_grad_svd_lowrank_xpu_float64", - "test_fn_grad_svd_xpu_complex128", - "test_fn_grad_svd_xpu_float64", - "test_fn_grad_tensordot_xpu_complex128", - "test_fn_grad_tensordot_xpu_float64", - "test_fn_grad_triangular_solve_xpu_complex128", - "test_fn_grad_triangular_solve_xpu_float64", - "test_fn_gradgrad___rmatmul___xpu_complex128", - "test_fn_gradgrad___rmatmul___xpu_float64", - "test_fn_gradgrad_addbmm_xpu_float64", - "test_fn_gradgrad_addmm_decomposed_xpu_complex128", - "test_fn_gradgrad_addmm_decomposed_xpu_float64", - "test_fn_gradgrad_addmm_xpu_complex128", - "test_fn_gradgrad_addmm_xpu_float64", - "test_fn_gradgrad_addmv_xpu_complex128", - "test_fn_gradgrad_addmv_xpu_float64", - "test_fn_gradgrad_addr_xpu_complex128", - "test_fn_gradgrad_addr_xpu_float64", - "test_fn_gradgrad_baddbmm_xpu_complex128", - "test_fn_gradgrad_baddbmm_xpu_float64", - "test_fn_gradgrad_bmm_xpu_complex128", - "test_fn_gradgrad_bmm_xpu_float64", - "test_fn_gradgrad_cholesky_inverse_xpu_complex128", - "test_fn_gradgrad_cholesky_inverse_xpu_float64", - "test_fn_gradgrad_cholesky_solve_xpu_complex128", - "test_fn_gradgrad_cholesky_solve_xpu_float64", - "test_fn_gradgrad_cholesky_xpu_complex128", - "test_fn_gradgrad_cholesky_xpu_float64", - "test_fn_gradgrad_corrcoef_xpu_complex128", - "test_fn_gradgrad_corrcoef_xpu_float64", - "test_fn_gradgrad_einsum_xpu_complex128", - "test_fn_gradgrad_einsum_xpu_float64", - "test_fn_gradgrad_inner_xpu_complex128", - "test_fn_gradgrad_inner_xpu_float64", - "test_fn_gradgrad_linalg_cholesky_ex_xpu_complex128", - "test_fn_gradgrad_linalg_cholesky_ex_xpu_float64", - "test_fn_gradgrad_linalg_cholesky_xpu_complex128", - "test_fn_gradgrad_linalg_cholesky_xpu_float64", - "test_fn_gradgrad_linalg_cond_xpu_complex128", - "test_fn_gradgrad_linalg_cond_xpu_float64", - "test_fn_gradgrad_linalg_det_xpu_complex128", - "test_fn_gradgrad_linalg_det_xpu_float64", - "test_fn_gradgrad_linalg_eig_xpu_complex128", - "test_fn_gradgrad_linalg_eig_xpu_float64", - "test_fn_gradgrad_linalg_eigh_xpu_complex128", - "test_fn_gradgrad_linalg_eigh_xpu_float64", - "test_fn_gradgrad_linalg_eigvals_xpu_complex128", - "test_fn_gradgrad_linalg_eigvals_xpu_float64", - "test_fn_gradgrad_linalg_eigvalsh_xpu_complex128", - "test_fn_gradgrad_linalg_eigvalsh_xpu_float64", - "test_fn_gradgrad_linalg_householder_product_xpu_complex128", - "test_fn_gradgrad_linalg_householder_product_xpu_float64", - "test_fn_gradgrad_linalg_inv_ex_xpu_complex128", - "test_fn_gradgrad_linalg_inv_ex_xpu_float64", - "test_fn_gradgrad_linalg_inv_xpu_complex128", - "test_fn_gradgrad_linalg_inv_xpu_float64", - "test_fn_gradgrad_linalg_lstsq_grad_oriented_xpu_complex128", - "test_fn_gradgrad_linalg_lstsq_grad_oriented_xpu_float64", - "test_fn_gradgrad_linalg_lu_factor_ex_xpu_complex128", - "test_fn_gradgrad_linalg_lu_factor_ex_xpu_float64", - "test_fn_gradgrad_linalg_lu_factor_xpu_complex128", - "test_fn_gradgrad_linalg_lu_factor_xpu_float64", - "test_fn_gradgrad_linalg_lu_solve_xpu_complex128", - "test_fn_gradgrad_linalg_lu_solve_xpu_float64", - "test_fn_gradgrad_linalg_lu_xpu_complex128", - "test_fn_gradgrad_linalg_lu_xpu_float64", - "test_fn_gradgrad_linalg_matrix_norm_xpu_complex128", - "test_fn_gradgrad_linalg_matrix_norm_xpu_float64", - "test_fn_gradgrad_linalg_matrix_power_xpu_complex128", - "test_fn_gradgrad_linalg_matrix_power_xpu_float64", - "test_fn_gradgrad_linalg_multi_dot_xpu_complex128", - "test_fn_gradgrad_linalg_multi_dot_xpu_float64", - "test_fn_gradgrad_linalg_pinv_hermitian_xpu_complex128", - "test_fn_gradgrad_linalg_pinv_hermitian_xpu_float64", - "test_fn_gradgrad_linalg_pinv_singular_xpu_float64", - "test_fn_gradgrad_linalg_pinv_xpu_complex128", - "test_fn_gradgrad_linalg_pinv_xpu_float64", - "test_fn_gradgrad_linalg_qr_xpu_complex128", - "test_fn_gradgrad_linalg_qr_xpu_float64", - "test_fn_gradgrad_linalg_slogdet_xpu_complex128", - "test_fn_gradgrad_linalg_slogdet_xpu_float64", - "test_fn_gradgrad_linalg_solve_ex_xpu_complex128", - "test_fn_gradgrad_linalg_solve_ex_xpu_float64", - "test_fn_gradgrad_linalg_solve_triangular_xpu_complex128", - "test_fn_gradgrad_linalg_solve_triangular_xpu_float64", - "test_fn_gradgrad_linalg_solve_xpu_complex128", - "test_fn_gradgrad_linalg_solve_xpu_float64", - "test_fn_gradgrad_linalg_svd_xpu_complex128", - "test_fn_gradgrad_linalg_svd_xpu_float64", - "test_fn_gradgrad_linalg_svdvals_xpu_complex128", - "test_fn_gradgrad_linalg_svdvals_xpu_float64", - "test_fn_gradgrad_linalg_tensorinv_xpu_complex128", - "test_fn_gradgrad_linalg_tensorinv_xpu_float64", - "test_fn_gradgrad_linalg_tensorsolve_xpu_complex128", - "test_fn_gradgrad_linalg_tensorsolve_xpu_float64", - "test_fn_gradgrad_logdet_xpu_complex128", - "test_fn_gradgrad_logdet_xpu_float64", - "test_fn_gradgrad_lu_solve_xpu_complex128", - "test_fn_gradgrad_lu_solve_xpu_float64", - "test_fn_gradgrad_lu_xpu_complex128", - "test_fn_gradgrad_lu_xpu_float64", - "test_fn_gradgrad_matmul_xpu_complex128", - "test_fn_gradgrad_matmul_xpu_float64", - "test_fn_gradgrad_mm_xpu_complex128", - "test_fn_gradgrad_mm_xpu_float64", - "test_fn_gradgrad_mv_xpu_complex128", - "test_fn_gradgrad_mv_xpu_float64", - "test_fn_gradgrad_nn_functional_bilinear_xpu_float64", - "test_fn_gradgrad_nn_functional_linear_xpu_complex128", - "test_fn_gradgrad_nn_functional_linear_xpu_float64", - "test_fn_gradgrad_nn_functional_multi_head_attention_forward_xpu_float64", - "test_fn_gradgrad_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_fn_gradgrad_norm_nuc_xpu_complex128", - "test_fn_gradgrad_norm_nuc_xpu_float64", - "test_fn_gradgrad_ormqr_xpu_complex128", - "test_fn_gradgrad_ormqr_xpu_float64", - "test_fn_gradgrad_pca_lowrank_xpu_float64", - "test_fn_gradgrad_pinverse_xpu_complex128", - "test_fn_gradgrad_pinverse_xpu_float64", - "test_fn_gradgrad_qr_xpu_complex128", - "test_fn_gradgrad_qr_xpu_float64", - "test_fn_gradgrad_svd_lowrank_xpu_float64", - "test_fn_gradgrad_svd_xpu_complex128", - "test_fn_gradgrad_svd_xpu_float64", - "test_fn_gradgrad_tensordot_xpu_complex128", - "test_fn_gradgrad_tensordot_xpu_float64", - "test_fn_gradgrad_triangular_solve_xpu_complex128", - "test_fn_gradgrad_triangular_solve_xpu_float64", - "test_inplace_grad_addbmm_xpu_float64", - "test_inplace_grad_addmm_decomposed_xpu_complex128", - "test_inplace_grad_addmm_decomposed_xpu_float64", - "test_inplace_grad_addmm_xpu_complex128", - "test_inplace_grad_addmm_xpu_float64", - "test_inplace_grad_addmv_xpu_complex128", - "test_inplace_grad_addmv_xpu_float64", - "test_inplace_grad_addr_xpu_complex128", - "test_inplace_grad_addr_xpu_float64", - "test_inplace_grad_baddbmm_xpu_complex128", - "test_inplace_grad_baddbmm_xpu_float64", - "test_inplace_gradgrad_addbmm_xpu_float64", - "test_inplace_gradgrad_addmm_decomposed_xpu_complex128", - "test_inplace_gradgrad_addmm_decomposed_xpu_float64", - "test_inplace_gradgrad_addmm_xpu_complex128", - "test_inplace_gradgrad_addmm_xpu_float64", - "test_inplace_gradgrad_addmv_xpu_complex128", - "test_inplace_gradgrad_addmv_xpu_float64", - "test_inplace_gradgrad_addr_xpu_complex128", - "test_inplace_gradgrad_addr_xpu_float64", - "test_inplace_gradgrad_baddbmm_xpu_complex128", - "test_inplace_gradgrad_baddbmm_xpu_float64", - "test_fn_grad_pca_lowrank_xpu_complex128", - "test_fn_grad_svd_lowrank_xpu_complex128", - "test_fn_gradgrad_pca_lowrank_xpu_complex128", - "test_fn_gradgrad_svd_lowrank_xpu_complex128", - "test_fn_grad_linalg_norm_xpu_complex128", - ### Error #1 in TestBwdGradientsXPU , totally 4 , RuntimeError: value cannot be converted to type float without overflow - "test_fn_grad_addbmm_xpu_complex128", - "test_fn_gradgrad_addbmm_xpu_complex128", - "test_inplace_grad_addbmm_xpu_complex128", - "test_inplace_gradgrad_addbmm_xpu_complex128", - ### rrelu_xpu op is not implemented,try these cases after implementing rrelu. - "test_fn_grad_nn_functional_rrelu_xpu_float64", - "test_fn_gradgrad_nn_functional_rrelu_xpu_float64", - "test_inplace_grad_nn_functional_rrelu_xpu_float64", - "test_inplace_gradgrad_nn_functional_rrelu_xpu_float64", - ### Error #4 in TestBwdGradientsXPU , totally 8 , RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive - "test_fn_grad_nn_functional_conv_transpose2d_xpu_complex128", - "test_fn_grad_nn_functional_conv_transpose2d_xpu_float64", - "test_fn_grad_nn_functional_conv_transpose3d_xpu_complex128", - "test_fn_grad_nn_functional_conv_transpose3d_xpu_float64", - "test_fn_gradgrad_nn_functional_conv_transpose2d_xpu_complex128", - "test_fn_gradgrad_nn_functional_conv_transpose2d_xpu_float64", - "test_fn_gradgrad_nn_functional_conv_transpose3d_xpu_complex128", - "test_fn_gradgrad_nn_functional_conv_transpose3d_xpu_float64", - "test_fn_gradgrad_index_reduce_mean_xpu_float64", - "test_fn_gradgrad_index_reduce_prod_xpu_float64", - "test_inplace_gradgrad_index_reduce_mean_xpu_float64", - "test_inplace_gradgrad_index_reduce_prod_xpu_float64", - ### Error #7 in TestBwdGradientsXPU , totally 2 , NotImplementedError: Could not run 'aten::_sparse_coo_tensor_with_dims_and_tensors' with arguments from the 'SparseXPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::_sparse_coo_tensor_with_dims_and_tensors' is only available for these backends: [XPU, Meta, SparseCPU, SparseMeta, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, AutogradNestedTensor, Tracer, AutocastCPU, AutocastXPU, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher]. - "test_fn_grad_to_sparse_xpu_float64", - "test_fn_gradgrad_to_sparse_xpu_float64", - - # issue: https://github.com/intel/torch-xpu-ops/issues/809 - "test_fn_gradgrad_nn_functional_conv3d_xpu_complex128", - "test_fn_gradgrad_nn_functional_conv3d_xpu_float64", - ), - "test_torch_xpu.py": ( # issue 302 ### Error #0 in TestTorchDeviceTypeXPU , totally 11 , RuntimeError: expected scalar type Long but found Int @@ -2627,79 +2327,6 @@ "nn/test_pruning_xpu.py": None, - "test_foreach_xpu.py": ( - # CPU fallback fails. Implementation difference between CPU and CUDA. Expect success on CPU and expect fail on CUDA. When we use CPU fallback and align expected fail list with CUDA, these cases fail. - # Unexpected success - "test_parity__foreach_ceil_fastpath_inplace_xpu_complex128", - "test_parity__foreach_ceil_fastpath_inplace_xpu_complex64", - "test_parity__foreach_ceil_fastpath_outplace_xpu_complex128", - "test_parity__foreach_ceil_fastpath_outplace_xpu_complex64", - "test_parity__foreach_clamp_max_fastpath_inplace_xpu_complex128", - "test_parity__foreach_clamp_max_fastpath_inplace_xpu_complex64", - "test_parity__foreach_clamp_max_fastpath_outplace_xpu_complex128", - "test_parity__foreach_clamp_max_fastpath_outplace_xpu_complex64", - "test_parity__foreach_clamp_min_fastpath_inplace_xpu_complex128", - "test_parity__foreach_clamp_min_fastpath_inplace_xpu_complex64", - "test_parity__foreach_clamp_min_fastpath_outplace_xpu_complex128", - "test_parity__foreach_clamp_min_fastpath_outplace_xpu_complex64", - "test_parity__foreach_erf_fastpath_inplace_xpu_complex128", - "test_parity__foreach_erf_fastpath_inplace_xpu_complex64", - "test_parity__foreach_erf_fastpath_outplace_xpu_complex128", - "test_parity__foreach_erf_fastpath_outplace_xpu_complex64", - "test_parity__foreach_erfc_fastpath_inplace_xpu_complex128", - "test_parity__foreach_erfc_fastpath_inplace_xpu_complex64", - "test_parity__foreach_erfc_fastpath_outplace_xpu_complex128", - "test_parity__foreach_erfc_fastpath_outplace_xpu_complex64", - "test_parity__foreach_floor_fastpath_inplace_xpu_complex128", - "test_parity__foreach_floor_fastpath_inplace_xpu_complex64", - "test_parity__foreach_floor_fastpath_outplace_xpu_complex128", - "test_parity__foreach_floor_fastpath_outplace_xpu_complex64", - "test_parity__foreach_frac_fastpath_inplace_xpu_complex128", - "test_parity__foreach_frac_fastpath_inplace_xpu_complex64", - "test_parity__foreach_frac_fastpath_outplace_xpu_complex128", - "test_parity__foreach_frac_fastpath_outplace_xpu_complex64", - "test_parity__foreach_lgamma_fastpath_inplace_xpu_bfloat16", - "test_parity__foreach_lgamma_fastpath_inplace_xpu_complex128", - "test_parity__foreach_lgamma_fastpath_inplace_xpu_complex64", - "test_parity__foreach_lgamma_fastpath_outplace_xpu_bfloat16", - "test_parity__foreach_lgamma_fastpath_outplace_xpu_complex128", - "test_parity__foreach_lgamma_fastpath_outplace_xpu_complex64", - "test_parity__foreach_maximum_fastpath_inplace_xpu_complex128", - "test_parity__foreach_maximum_fastpath_inplace_xpu_complex64", - "test_parity__foreach_maximum_fastpath_outplace_xpu_complex128", - "test_parity__foreach_maximum_fastpath_outplace_xpu_complex64", - "test_parity__foreach_minimum_fastpath_inplace_xpu_complex128", - "test_parity__foreach_minimum_fastpath_inplace_xpu_complex64", - "test_parity__foreach_minimum_fastpath_outplace_xpu_complex128", - "test_parity__foreach_minimum_fastpath_outplace_xpu_complex64", - "test_parity__foreach_round_fastpath_inplace_xpu_complex128", - "test_parity__foreach_round_fastpath_inplace_xpu_complex64", - "test_parity__foreach_round_fastpath_outplace_xpu_complex128", - "test_parity__foreach_round_fastpath_outplace_xpu_complex64", - "test_parity__foreach_sigmoid_fastpath_inplace_xpu_complex128", - "test_parity__foreach_sigmoid_fastpath_inplace_xpu_complex64", - "test_parity__foreach_sigmoid_fastpath_outplace_xpu_complex128", - "test_parity__foreach_sigmoid_fastpath_outplace_xpu_complex64", - "test_parity__foreach_sign_fastpath_inplace_xpu_complex128", - "test_parity__foreach_sign_fastpath_inplace_xpu_complex64", - "test_parity__foreach_sign_fastpath_outplace_xpu_complex128", - "test_parity__foreach_sign_fastpath_outplace_xpu_complex64", - "test_parity__foreach_trunc_fastpath_inplace_xpu_complex128", - "test_parity__foreach_trunc_fastpath_inplace_xpu_complex64", - "test_parity__foreach_trunc_fastpath_outplace_xpu_complex128", - "test_parity__foreach_trunc_fastpath_outplace_xpu_complex64", - "test_autodiff__foreach_sigmoid_inplace_xpu_complex128", - "test_autodiff__foreach_sigmoid_outplace_xpu_complex128", - "test_binary_op_with_scalar_self_support__foreach_pow_is_fastpath_True_xpu_bool", - # AssertionError: RuntimeError not raised - "test_0dim_tensor_overload_exception_xpu", - # RuntimeError: Tried to instantiate dummy base class CUDAGraph - "test_big_num_tensors__foreach_max_use_cuda_graph_True_xpu_float32", - "test_big_num_tensors__foreach_max_use_cuda_graph_True_xpu_float64", - "test_big_num_tensors__foreach_norm_use_cuda_graph_True_xpu_float32", - "test_big_num_tensors__foreach_norm_use_cuda_graph_True_xpu_float64", - ), - "nn/test_convolution_xpu.py": ( # XPU unsupport ops, skip. # https://github.com/intel/torch-xpu-ops/issues/348