From c295ac72fb6f6dd17087cf5587f82b467999fa24 Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Fri, 20 Dec 2024 02:46:23 -0800 Subject: [PATCH 01/13] Event free: Make the event free APIs compatible with different compiler versions Signed-off-by: Feng Yuan --- cmake/BuildFlags.cmake | 2 + cmake/Modules/FindSYCLToolkit.cmake | 57 +++++++++++++++++++++++------ src/comm/SYCLHelpers.h | 48 ++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 11 deletions(-) diff --git a/cmake/BuildFlags.cmake b/cmake/BuildFlags.cmake index f85598b07..f6927bd58 100644 --- a/cmake/BuildFlags.cmake +++ b/cmake/BuildFlags.cmake @@ -47,6 +47,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" list(APPEND SYCL_HOST_FLAGS -O0) endif(CMAKE_BUILD_TYPE MATCHES Debug) + list(APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER}) # -- Kernel flags (SYCL_KERNEL_OPTIONS) # The fast-math will be enabled by default in SYCL compiler. # Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math] @@ -85,6 +86,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_PREVIEW_BREAKING_CHANGES) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}) endif() + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER}) CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU) if(SUPPORTS_FP64_CONV_EMU) diff --git a/cmake/Modules/FindSYCLToolkit.cmake b/cmake/Modules/FindSYCLToolkit.cmake index 46e34c7f8..88edd34a7 100644 --- a/cmake/Modules/FindSYCLToolkit.cmake +++ b/cmake/Modules/FindSYCLToolkit.cmake @@ -35,6 +35,7 @@ endif() if(SYCLTOOLKIT_FOUND) return() endif() + set(SYCLTOOLKIT_FOUND TRUE) include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) @@ -77,7 +78,7 @@ endif() # Function to write a test case to verify SYCL features. -function(SYCL_CMPLR_TEST_WRITE src) +function(SYCL_CMPLR_TEST_WRITE src macro_name) set(cpp_macro_if "#if") set(cpp_macro_endif "#endif") @@ -88,8 +89,8 @@ function(SYCL_CMPLR_TEST_WRITE src) # Feature tests goes here - string(APPEND SYCL_CMPLR_TEST_CONTENT "${cpp_macro_if} defined(SYCL_LANGUAGE_VERSION)\n") - string(APPEND SYCL_CMPLR_TEST_CONTENT "cout << \"SYCL_LANGUAGE_VERSION=\"< range, ::sycl::queue q, ker_t ker) { +#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ + __INTEL_LLVM_COMPILER_VERSION >= 20250000 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::parallel_for(cgh, range, ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { cgh.parallel_for(range, ker); }; + q.submit(cgf); +#endif } // Additional convention of SYCL kernel configuration. Besides construct kernel @@ -80,12 +86,22 @@ sycl_kernel_submit( ::sycl::range local_range, ::sycl::queue q, ker_t ker) { +#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ + __INTEL_LLVM_COMPILER_VERSION >= 20250000 auto cgf = [&](::sycl::handler& cgh) { ker.sycl_ker_config_convention(cgh); ::sycl::ext::oneapi::experimental::nd_launch( cgh, ::sycl::nd_range(global_range, local_range), ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { + ker.sycl_ker_config_convention(cgh); + cgh.parallel_for( + ::sycl::nd_range(global_range, local_range), ker); + }; + q.submit(cgf); +#endif } template @@ -97,11 +113,20 @@ sycl_kernel_submit( ::sycl::range local_range, ::sycl::queue q, ker_t ker) { +#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ + __INTEL_LLVM_COMPILER_VERSION >= 20250000 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::nd_launch( cgh, ::sycl::nd_range(global_range, local_range), ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { + cgh.parallel_for( + ::sycl::nd_range(global_range, local_range), ker); + }; + q.submit(cgf); +#endif } template @@ -113,6 +138,8 @@ sycl_kernel_submit( int64_t local_range, ::sycl::queue q, ker_t ker) { +#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ + __INTEL_LLVM_COMPILER_VERSION >= 20250000 auto cgf = [&](::sycl::handler& cgh) { ker.sycl_ker_config_convention(cgh); ::sycl::ext::oneapi::experimental::nd_launch( @@ -122,6 +149,16 @@ sycl_kernel_submit( ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { + ker.sycl_ker_config_convention(cgh); + cgh.parallel_for( + ::sycl::nd_range<1>( + ::sycl::range<1>(global_range), ::sycl::range<1>(local_range)), + ker); + }; + q.submit(cgf); +#endif } template @@ -133,6 +170,8 @@ sycl_kernel_submit( int64_t local_range, ::sycl::queue q, ker_t ker) { +#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ + __INTEL_LLVM_COMPILER_VERSION >= 20250000 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::nd_launch( cgh, @@ -141,4 +180,13 @@ sycl_kernel_submit( ker); }; ::sycl::ext::oneapi::experimental::submit(q, cgf); +#else + auto cgf = [&](::sycl::handler& cgh) { + cgh.parallel_for( + ::sycl::nd_range<1>( + ::sycl::range<1>(global_range), ::sycl::range<1>(local_range)), + ker); + }; + q.submit(cgf); +#endif } From a35cbbcb5088f63fce73328e1d07abd6535a2ede Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Fri, 20 Dec 2024 21:11:50 +0800 Subject: [PATCH 02/13] Remove additional line --- cmake/Modules/FindSYCLToolkit.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/Modules/FindSYCLToolkit.cmake b/cmake/Modules/FindSYCLToolkit.cmake index 88edd34a7..88e5768c4 100644 --- a/cmake/Modules/FindSYCLToolkit.cmake +++ b/cmake/Modules/FindSYCLToolkit.cmake @@ -35,7 +35,6 @@ endif() if(SYCLTOOLKIT_FOUND) return() endif() - set(SYCLTOOLKIT_FOUND TRUE) include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) From ed8439aa156d1f8f5e70a0b391742febd494d946 Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Sun, 22 Dec 2024 17:48:53 -0800 Subject: [PATCH 03/13] Rebase PyTorch PR for PRECI --- .github/scripts/apply_torch_pr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/apply_torch_pr.py b/.github/scripts/apply_torch_pr.py index 9ef238abb..9023ceeea 100644 --- a/.github/scripts/apply_torch_pr.py +++ b/.github/scripts/apply_torch_pr.py @@ -16,7 +16,8 @@ # [Inductor][Intel GPU] Support reduction split. "https://github.com/pytorch/pytorch/pull/129120", # Modify the tolerance level in TIMM benchmark - "https://github.com/pytorch/pytorch/pull/129735", + # "https://github.com/pytorch/pytorch/pull/129735", + "https://github.com/mengfei25/pytorch/pull/21", ] ) parser.add_argument('--extra-pr-list', '-e', nargs='+',default=[]) From 3fc2c62f485d5be29e7f32d98d113d241841c406 Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Sun, 22 Dec 2024 18:02:44 -0800 Subject: [PATCH 04/13] Make PRECI work for PyTorch 2.5 --- .github/scripts/env.sh | 3 ++- .github/workflows/pull.yml | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh index ab7d7812d..56d8e3930 100644 --- a/.github/scripts/env.sh +++ b/.github/scripts/env.sh @@ -1,3 +1,4 @@ #!/bin/bash -source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh +source /opt/intel/oneapi/compiler/latest/env/vars.sh +source /opt/intel/oneapi/umf/latest/env/vars.sh source /opt/intel/oneapi/pti/latest/env/vars.sh diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 45fdef513..fd3e25bb0 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -24,6 +24,7 @@ jobs: if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} uses: ./.github/workflows/_linux_ut.yml with: + pytorch: release/2.5 ut: op_example,op_extended,op_ut runner: linux.idc.xpu @@ -32,6 +33,7 @@ jobs: if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} uses: ./.github/workflows/_linux_ut.yml with: + pytorch: release/2.5 abi: 0 ut: op_extended runner: linux.idc.xpu @@ -57,7 +59,7 @@ jobs: pwd cd ../ && rm -rf pytorch source activate e2e_ci - git clone -b main https://github.com/pytorch/pytorch pytorch + git clone -b release/2.5 https://github.com/pytorch/pytorch pytorch cd pytorch # apply PRs for stock pytorch pip install requests From 5158cd08021ef89ef6985ec9b64de00e666ce5b4 Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Sun, 22 Dec 2024 18:08:46 -0800 Subject: [PATCH 05/13] Revert private branch of PyTorch patches --- .github/scripts/apply_torch_pr.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/scripts/apply_torch_pr.py b/.github/scripts/apply_torch_pr.py index 9023ceeea..9ef238abb 100644 --- a/.github/scripts/apply_torch_pr.py +++ b/.github/scripts/apply_torch_pr.py @@ -16,8 +16,7 @@ # [Inductor][Intel GPU] Support reduction split. "https://github.com/pytorch/pytorch/pull/129120", # Modify the tolerance level in TIMM benchmark - # "https://github.com/pytorch/pytorch/pull/129735", - "https://github.com/mengfei25/pytorch/pull/21", + "https://github.com/pytorch/pytorch/pull/129735", ] ) parser.add_argument('--extra-pr-list', '-e', nargs='+',default=[]) From 70a762abd2615c4785a72b8574f458f5f8b8a56a Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Sun, 22 Dec 2024 21:17:17 -0800 Subject: [PATCH 06/13] Mute error --- cmake/BuildFlags.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/BuildFlags.cmake b/cmake/BuildFlags.cmake index f6927bd58..f1af7af70 100644 --- a/cmake/BuildFlags.cmake +++ b/cmake/BuildFlags.cmake @@ -40,6 +40,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" list(APPEND SYCL_HOST_FLAGS -Wno-deprecated) list(APPEND SYCL_HOST_FLAGS -Wno-attributes) list(APPEND SYCL_HOST_FLAGS -Wno-sign-compare) + list(APPEND SYCL_HOST_FLAGS -Wno-error=comment) endif() if(CMAKE_BUILD_TYPE MATCHES Debug) @@ -82,6 +83,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-error=comment) # Equivalent to build option -fpreview-breaking-changes for SYCL compiler. set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_PREVIEW_BREAKING_CHANGES) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}) From df7f7f624e3be6b4d4505331cedc4c479709202c Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Sun, 22 Dec 2024 22:18:26 -0800 Subject: [PATCH 07/13] Mute error --- cmake/BuildFlags.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/BuildFlags.cmake b/cmake/BuildFlags.cmake index f1af7af70..c5d3e5d46 100644 --- a/cmake/BuildFlags.cmake +++ b/cmake/BuildFlags.cmake @@ -41,6 +41,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" list(APPEND SYCL_HOST_FLAGS -Wno-attributes) list(APPEND SYCL_HOST_FLAGS -Wno-sign-compare) list(APPEND SYCL_HOST_FLAGS -Wno-error=comment) + list(APPEND SYCL_HOST_FLAGS -Wno-error=terminate) endif() if(CMAKE_BUILD_TYPE MATCHES Debug) @@ -84,6 +85,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-error=comment) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-error=terminate) # Equivalent to build option -fpreview-breaking-changes for SYCL compiler. set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_PREVIEW_BREAKING_CHANGES) set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}) From a48cadc98489f644e9d5d85305d6c6f557d96a3f Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Mon, 23 Dec 2024 17:42:08 -0800 Subject: [PATCH 08/13] Remove failed cases due to PyTorch uplift --- test/xpu/skip_list_common.py | 308 ----------------------------------- 1 file changed, 308 deletions(-) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index 0d0f18a86..5909d52c7 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -2153,314 +2153,6 @@ "test_to and not test_to_memory and not test_total", ), - "test_ops_gradients_xpu.py": ( - ### Error #0 in TestBwdGradientsXPU , totally 271 , RuntimeError: Double and complex datatype matmul is not supported in oneDNN - "test_fn_grad___rmatmul___xpu_complex128", - "test_fn_grad___rmatmul___xpu_float64", - "test_fn_grad_addbmm_xpu_float64", - "test_fn_grad_addmm_decomposed_xpu_complex128", - "test_fn_grad_addmm_decomposed_xpu_float64", - "test_fn_grad_addmm_xpu_complex128", - "test_fn_grad_addmm_xpu_float64", - "test_fn_grad_addmv_xpu_complex128", - "test_fn_grad_addmv_xpu_float64", - "test_fn_grad_addr_xpu_complex128", - "test_fn_grad_addr_xpu_float64", - "test_fn_grad_baddbmm_xpu_complex128", - "test_fn_grad_baddbmm_xpu_float64", - "test_fn_grad_bmm_xpu_complex128", - "test_fn_grad_bmm_xpu_float64", - "test_fn_grad_cdist_xpu_float64", - "test_fn_grad_cholesky_inverse_xpu_complex128", - "test_fn_grad_cholesky_inverse_xpu_float64", - "test_fn_grad_cholesky_solve_xpu_complex128", - "test_fn_grad_cholesky_solve_xpu_float64", - "test_fn_grad_cholesky_xpu_complex128", - "test_fn_grad_cholesky_xpu_float64", - "test_fn_grad_corrcoef_xpu_complex128", - "test_fn_grad_corrcoef_xpu_float64", - "test_fn_grad_einsum_xpu_complex128", - "test_fn_grad_einsum_xpu_float64", - "test_fn_grad_inner_xpu_complex128", - "test_fn_grad_inner_xpu_float64", - "test_fn_grad_linalg_cholesky_ex_xpu_complex128", - "test_fn_grad_linalg_cholesky_ex_xpu_float64", - "test_fn_grad_linalg_cholesky_xpu_complex128", - "test_fn_grad_linalg_cholesky_xpu_float64", - "test_fn_grad_linalg_cond_xpu_complex128", - "test_fn_grad_linalg_cond_xpu_float64", - "test_fn_grad_linalg_det_singular_xpu_complex128", - "test_fn_grad_linalg_det_singular_xpu_float64", - "test_fn_grad_linalg_det_xpu_complex128", - "test_fn_grad_linalg_det_xpu_float64", - "test_fn_grad_linalg_eig_xpu_complex128", - "test_fn_grad_linalg_eig_xpu_float64", - "test_fn_grad_linalg_eigh_xpu_complex128", - "test_fn_grad_linalg_eigh_xpu_float64", - "test_fn_grad_linalg_eigvals_xpu_complex128", - "test_fn_grad_linalg_eigvals_xpu_float64", - "test_fn_grad_linalg_eigvalsh_xpu_complex128", - "test_fn_grad_linalg_eigvalsh_xpu_float64", - "test_fn_grad_linalg_householder_product_xpu_complex128", - "test_fn_grad_linalg_householder_product_xpu_float64", - "test_fn_grad_linalg_inv_ex_xpu_complex128", - "test_fn_grad_linalg_inv_ex_xpu_float64", - "test_fn_grad_linalg_inv_xpu_complex128", - "test_fn_grad_linalg_inv_xpu_float64", - "test_fn_grad_linalg_lstsq_grad_oriented_xpu_complex128", - "test_fn_grad_linalg_lstsq_grad_oriented_xpu_float64", - "test_fn_grad_linalg_lu_factor_ex_xpu_complex128", - "test_fn_grad_linalg_lu_factor_ex_xpu_float64", - "test_fn_grad_linalg_lu_factor_xpu_complex128", - "test_fn_grad_linalg_lu_factor_xpu_float64", - "test_fn_grad_linalg_lu_solve_xpu_complex128", - "test_fn_grad_linalg_lu_solve_xpu_float64", - "test_fn_grad_linalg_lu_xpu_complex128", - "test_fn_grad_linalg_lu_xpu_float64", - "test_fn_grad_linalg_matrix_norm_xpu_complex128", - "test_fn_grad_linalg_matrix_norm_xpu_float64", - "test_fn_grad_linalg_matrix_power_xpu_complex128", - "test_fn_grad_linalg_matrix_power_xpu_float64", - "test_fn_grad_linalg_multi_dot_xpu_complex128", - "test_fn_grad_linalg_multi_dot_xpu_float64", - "test_fn_grad_linalg_norm_xpu_float64", - "test_fn_grad_linalg_pinv_hermitian_xpu_complex128", - "test_fn_grad_linalg_pinv_hermitian_xpu_float64", - "test_fn_grad_linalg_pinv_singular_xpu_complex128", - "test_fn_grad_linalg_pinv_singular_xpu_float64", - "test_fn_grad_linalg_pinv_xpu_complex128", - "test_fn_grad_linalg_pinv_xpu_float64", - "test_fn_grad_linalg_qr_xpu_complex128", - "test_fn_grad_linalg_qr_xpu_float64", - "test_fn_grad_linalg_slogdet_xpu_complex128", - "test_fn_grad_linalg_slogdet_xpu_float64", - "test_fn_grad_linalg_solve_ex_xpu_complex128", - "test_fn_grad_linalg_solve_ex_xpu_float64", - "test_fn_grad_linalg_solve_triangular_xpu_complex128", - "test_fn_grad_linalg_solve_triangular_xpu_float64", - "test_fn_grad_linalg_solve_xpu_complex128", - "test_fn_grad_linalg_solve_xpu_float64", - "test_fn_grad_linalg_svd_xpu_complex128", - "test_fn_grad_linalg_svd_xpu_float64", - "test_fn_grad_linalg_svdvals_xpu_complex128", - "test_fn_grad_linalg_svdvals_xpu_float64", - "test_fn_grad_linalg_tensorinv_xpu_complex128", - "test_fn_grad_linalg_tensorinv_xpu_float64", - "test_fn_grad_linalg_tensorsolve_xpu_complex128", - "test_fn_grad_linalg_tensorsolve_xpu_float64", - "test_fn_grad_logdet_xpu_complex128", - "test_fn_grad_logdet_xpu_float64", - "test_fn_grad_lu_solve_xpu_complex128", - "test_fn_grad_lu_solve_xpu_float64", - "test_fn_grad_lu_xpu_complex128", - "test_fn_grad_lu_xpu_float64", - "test_fn_grad_matmul_xpu_complex128", - "test_fn_grad_matmul_xpu_float64", - "test_fn_grad_mm_xpu_complex128", - "test_fn_grad_mm_xpu_float64", - "test_fn_grad_mv_xpu_complex128", - "test_fn_grad_mv_xpu_float64", - "test_fn_grad_nn_functional_bilinear_xpu_float64", - "test_fn_grad_nn_functional_linear_xpu_complex128", - "test_fn_grad_nn_functional_linear_xpu_float64", - "test_fn_grad_nn_functional_multi_head_attention_forward_xpu_float64", - "test_fn_grad_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_fn_grad_norm_nuc_xpu_complex128", - "test_fn_grad_norm_nuc_xpu_float64", - "test_fn_grad_ormqr_xpu_complex128", - "test_fn_grad_ormqr_xpu_float64", - "test_fn_grad_pca_lowrank_xpu_float64", - "test_fn_grad_pinverse_xpu_complex128", - "test_fn_grad_pinverse_xpu_float64", - "test_fn_grad_qr_xpu_complex128", - "test_fn_grad_qr_xpu_float64", - "test_fn_grad_svd_lowrank_xpu_float64", - "test_fn_grad_svd_xpu_complex128", - "test_fn_grad_svd_xpu_float64", - "test_fn_grad_tensordot_xpu_complex128", - "test_fn_grad_tensordot_xpu_float64", - "test_fn_grad_triangular_solve_xpu_complex128", - "test_fn_grad_triangular_solve_xpu_float64", - "test_fn_gradgrad___rmatmul___xpu_complex128", - "test_fn_gradgrad___rmatmul___xpu_float64", - "test_fn_gradgrad_addbmm_xpu_float64", - "test_fn_gradgrad_addmm_decomposed_xpu_complex128", - "test_fn_gradgrad_addmm_decomposed_xpu_float64", - "test_fn_gradgrad_addmm_xpu_complex128", - "test_fn_gradgrad_addmm_xpu_float64", - "test_fn_gradgrad_addmv_xpu_complex128", - "test_fn_gradgrad_addmv_xpu_float64", - "test_fn_gradgrad_addr_xpu_complex128", - "test_fn_gradgrad_addr_xpu_float64", - "test_fn_gradgrad_baddbmm_xpu_complex128", - "test_fn_gradgrad_baddbmm_xpu_float64", - "test_fn_gradgrad_bmm_xpu_complex128", - "test_fn_gradgrad_bmm_xpu_float64", - "test_fn_gradgrad_cholesky_inverse_xpu_complex128", - "test_fn_gradgrad_cholesky_inverse_xpu_float64", - "test_fn_gradgrad_cholesky_solve_xpu_complex128", - "test_fn_gradgrad_cholesky_solve_xpu_float64", - "test_fn_gradgrad_cholesky_xpu_complex128", - "test_fn_gradgrad_cholesky_xpu_float64", - "test_fn_gradgrad_corrcoef_xpu_complex128", - "test_fn_gradgrad_corrcoef_xpu_float64", - "test_fn_gradgrad_einsum_xpu_complex128", - "test_fn_gradgrad_einsum_xpu_float64", - "test_fn_gradgrad_inner_xpu_complex128", - "test_fn_gradgrad_inner_xpu_float64", - "test_fn_gradgrad_linalg_cholesky_ex_xpu_complex128", - "test_fn_gradgrad_linalg_cholesky_ex_xpu_float64", - "test_fn_gradgrad_linalg_cholesky_xpu_complex128", - "test_fn_gradgrad_linalg_cholesky_xpu_float64", - "test_fn_gradgrad_linalg_cond_xpu_complex128", - "test_fn_gradgrad_linalg_cond_xpu_float64", - "test_fn_gradgrad_linalg_det_xpu_complex128", - "test_fn_gradgrad_linalg_det_xpu_float64", - "test_fn_gradgrad_linalg_eig_xpu_complex128", - "test_fn_gradgrad_linalg_eig_xpu_float64", - "test_fn_gradgrad_linalg_eigh_xpu_complex128", - "test_fn_gradgrad_linalg_eigh_xpu_float64", - "test_fn_gradgrad_linalg_eigvals_xpu_complex128", - "test_fn_gradgrad_linalg_eigvals_xpu_float64", - "test_fn_gradgrad_linalg_eigvalsh_xpu_complex128", - "test_fn_gradgrad_linalg_eigvalsh_xpu_float64", - "test_fn_gradgrad_linalg_householder_product_xpu_complex128", - "test_fn_gradgrad_linalg_householder_product_xpu_float64", - "test_fn_gradgrad_linalg_inv_ex_xpu_complex128", - "test_fn_gradgrad_linalg_inv_ex_xpu_float64", - "test_fn_gradgrad_linalg_inv_xpu_complex128", - "test_fn_gradgrad_linalg_inv_xpu_float64", - "test_fn_gradgrad_linalg_lstsq_grad_oriented_xpu_complex128", - "test_fn_gradgrad_linalg_lstsq_grad_oriented_xpu_float64", - "test_fn_gradgrad_linalg_lu_factor_ex_xpu_complex128", - "test_fn_gradgrad_linalg_lu_factor_ex_xpu_float64", - "test_fn_gradgrad_linalg_lu_factor_xpu_complex128", - "test_fn_gradgrad_linalg_lu_factor_xpu_float64", - "test_fn_gradgrad_linalg_lu_solve_xpu_complex128", - "test_fn_gradgrad_linalg_lu_solve_xpu_float64", - "test_fn_gradgrad_linalg_lu_xpu_complex128", - "test_fn_gradgrad_linalg_lu_xpu_float64", - "test_fn_gradgrad_linalg_matrix_norm_xpu_complex128", - "test_fn_gradgrad_linalg_matrix_norm_xpu_float64", - "test_fn_gradgrad_linalg_matrix_power_xpu_complex128", - "test_fn_gradgrad_linalg_matrix_power_xpu_float64", - "test_fn_gradgrad_linalg_multi_dot_xpu_complex128", - "test_fn_gradgrad_linalg_multi_dot_xpu_float64", - "test_fn_gradgrad_linalg_pinv_hermitian_xpu_complex128", - "test_fn_gradgrad_linalg_pinv_hermitian_xpu_float64", - "test_fn_gradgrad_linalg_pinv_singular_xpu_float64", - "test_fn_gradgrad_linalg_pinv_xpu_complex128", - "test_fn_gradgrad_linalg_pinv_xpu_float64", - "test_fn_gradgrad_linalg_qr_xpu_complex128", - "test_fn_gradgrad_linalg_qr_xpu_float64", - "test_fn_gradgrad_linalg_slogdet_xpu_complex128", - "test_fn_gradgrad_linalg_slogdet_xpu_float64", - "test_fn_gradgrad_linalg_solve_ex_xpu_complex128", - "test_fn_gradgrad_linalg_solve_ex_xpu_float64", - "test_fn_gradgrad_linalg_solve_triangular_xpu_complex128", - "test_fn_gradgrad_linalg_solve_triangular_xpu_float64", - "test_fn_gradgrad_linalg_solve_xpu_complex128", - "test_fn_gradgrad_linalg_solve_xpu_float64", - "test_fn_gradgrad_linalg_svd_xpu_complex128", - "test_fn_gradgrad_linalg_svd_xpu_float64", - "test_fn_gradgrad_linalg_svdvals_xpu_complex128", - "test_fn_gradgrad_linalg_svdvals_xpu_float64", - "test_fn_gradgrad_linalg_tensorinv_xpu_complex128", - "test_fn_gradgrad_linalg_tensorinv_xpu_float64", - "test_fn_gradgrad_linalg_tensorsolve_xpu_complex128", - "test_fn_gradgrad_linalg_tensorsolve_xpu_float64", - "test_fn_gradgrad_logdet_xpu_complex128", - "test_fn_gradgrad_logdet_xpu_float64", - "test_fn_gradgrad_lu_solve_xpu_complex128", - "test_fn_gradgrad_lu_solve_xpu_float64", - "test_fn_gradgrad_lu_xpu_complex128", - "test_fn_gradgrad_lu_xpu_float64", - "test_fn_gradgrad_matmul_xpu_complex128", - "test_fn_gradgrad_matmul_xpu_float64", - "test_fn_gradgrad_mm_xpu_complex128", - "test_fn_gradgrad_mm_xpu_float64", - "test_fn_gradgrad_mv_xpu_complex128", - "test_fn_gradgrad_mv_xpu_float64", - "test_fn_gradgrad_nn_functional_bilinear_xpu_float64", - "test_fn_gradgrad_nn_functional_linear_xpu_complex128", - "test_fn_gradgrad_nn_functional_linear_xpu_float64", - "test_fn_gradgrad_nn_functional_multi_head_attention_forward_xpu_float64", - "test_fn_gradgrad_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_fn_gradgrad_norm_nuc_xpu_complex128", - "test_fn_gradgrad_norm_nuc_xpu_float64", - "test_fn_gradgrad_ormqr_xpu_complex128", - "test_fn_gradgrad_ormqr_xpu_float64", - "test_fn_gradgrad_pca_lowrank_xpu_float64", - "test_fn_gradgrad_pinverse_xpu_complex128", - "test_fn_gradgrad_pinverse_xpu_float64", - "test_fn_gradgrad_qr_xpu_complex128", - "test_fn_gradgrad_qr_xpu_float64", - "test_fn_gradgrad_svd_lowrank_xpu_float64", - "test_fn_gradgrad_svd_xpu_complex128", - "test_fn_gradgrad_svd_xpu_float64", - "test_fn_gradgrad_tensordot_xpu_complex128", - "test_fn_gradgrad_tensordot_xpu_float64", - "test_fn_gradgrad_triangular_solve_xpu_complex128", - "test_fn_gradgrad_triangular_solve_xpu_float64", - "test_inplace_grad_addbmm_xpu_float64", - "test_inplace_grad_addmm_decomposed_xpu_complex128", - "test_inplace_grad_addmm_decomposed_xpu_float64", - "test_inplace_grad_addmm_xpu_complex128", - "test_inplace_grad_addmm_xpu_float64", - "test_inplace_grad_addmv_xpu_complex128", - "test_inplace_grad_addmv_xpu_float64", - "test_inplace_grad_addr_xpu_complex128", - "test_inplace_grad_addr_xpu_float64", - "test_inplace_grad_baddbmm_xpu_complex128", - "test_inplace_grad_baddbmm_xpu_float64", - "test_inplace_gradgrad_addbmm_xpu_float64", - "test_inplace_gradgrad_addmm_decomposed_xpu_complex128", - "test_inplace_gradgrad_addmm_decomposed_xpu_float64", - "test_inplace_gradgrad_addmm_xpu_complex128", - "test_inplace_gradgrad_addmm_xpu_float64", - "test_inplace_gradgrad_addmv_xpu_complex128", - "test_inplace_gradgrad_addmv_xpu_float64", - "test_inplace_gradgrad_addr_xpu_complex128", - "test_inplace_gradgrad_addr_xpu_float64", - "test_inplace_gradgrad_baddbmm_xpu_complex128", - "test_inplace_gradgrad_baddbmm_xpu_float64", - "test_fn_grad_pca_lowrank_xpu_complex128", - "test_fn_grad_svd_lowrank_xpu_complex128", - "test_fn_gradgrad_pca_lowrank_xpu_complex128", - "test_fn_gradgrad_svd_lowrank_xpu_complex128", - "test_fn_grad_linalg_norm_xpu_complex128", - ### Error #1 in TestBwdGradientsXPU , totally 4 , RuntimeError: value cannot be converted to type float without overflow - "test_fn_grad_addbmm_xpu_complex128", - "test_fn_gradgrad_addbmm_xpu_complex128", - "test_inplace_grad_addbmm_xpu_complex128", - "test_inplace_gradgrad_addbmm_xpu_complex128", - ### rrelu_xpu op is not implemented,try these cases after implementing rrelu. - "test_fn_grad_nn_functional_rrelu_xpu_float64", - "test_fn_gradgrad_nn_functional_rrelu_xpu_float64", - "test_inplace_grad_nn_functional_rrelu_xpu_float64", - "test_inplace_gradgrad_nn_functional_rrelu_xpu_float64", - ### Error #4 in TestBwdGradientsXPU , totally 8 , RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive - "test_fn_grad_nn_functional_conv_transpose2d_xpu_complex128", - "test_fn_grad_nn_functional_conv_transpose2d_xpu_float64", - "test_fn_grad_nn_functional_conv_transpose3d_xpu_complex128", - "test_fn_grad_nn_functional_conv_transpose3d_xpu_float64", - "test_fn_gradgrad_nn_functional_conv_transpose2d_xpu_complex128", - "test_fn_gradgrad_nn_functional_conv_transpose2d_xpu_float64", - "test_fn_gradgrad_nn_functional_conv_transpose3d_xpu_complex128", - "test_fn_gradgrad_nn_functional_conv_transpose3d_xpu_float64", - "test_fn_gradgrad_index_reduce_mean_xpu_float64", - "test_fn_gradgrad_index_reduce_prod_xpu_float64", - "test_inplace_gradgrad_index_reduce_mean_xpu_float64", - "test_inplace_gradgrad_index_reduce_prod_xpu_float64", - ### Error #7 in TestBwdGradientsXPU , totally 2 , NotImplementedError: Could not run 'aten::_sparse_coo_tensor_with_dims_and_tensors' with arguments from the 'SparseXPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::_sparse_coo_tensor_with_dims_and_tensors' is only available for these backends: [XPU, Meta, SparseCPU, SparseMeta, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, AutogradNestedTensor, Tracer, AutocastCPU, AutocastXPU, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher]. - "test_fn_grad_to_sparse_xpu_float64", - "test_fn_gradgrad_to_sparse_xpu_float64", - - # issue: https://github.com/intel/torch-xpu-ops/issues/809 - "test_fn_gradgrad_nn_functional_conv3d_xpu_complex128", - "test_fn_gradgrad_nn_functional_conv3d_xpu_float64", - ), - "test_torch_xpu.py": ( # issue 302 ### Error #0 in TestTorchDeviceTypeXPU , totally 11 , RuntimeError: expected scalar type Long but found Int From d9d9be8a6e3cc0c3dfb743c26a5ed372d822e2ec Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Mon, 23 Dec 2024 21:22:02 -0800 Subject: [PATCH 09/13] Skip cases due to PyTorch uplift --- test/xpu/extended/run_test_with_skip.py | 3 +++ test/xpu/skip_list_common.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py index 01649588f..eba71b17f 100644 --- a/test/xpu/extended/run_test_with_skip.py +++ b/test/xpu/extended/run_test_with_skip.py @@ -148,6 +148,9 @@ "test_compare_cpu_nanmedian_xpu_int64", "test_compare_cpu_nanmedian_xpu_int8", "test_compare_cpu_nanmedian_xpu_uint8", + "test_compare_cpu_nn_functional_unfold_xpu_bool", + "test_non_standard_bool_values_nn_functional_unfold_xpu_bool", + "test_non_standard_bool_values_index_put_xpu_bool", ) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index 5909d52c7..7a0c5b503 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -1,5 +1,6 @@ skip_dict = { "test_ops_xpu.py": ( + "test_non_standard_bool_values_index_put_xpu_bool", # Skip list of base line "test_dtypes___rmod___xpu", "test_dtypes_nn_functional_conv1d_xpu", @@ -1250,6 +1251,12 @@ ), "test_unary_ufuncs_xpu.py": ( + "test_reference_numerics_extremal__refs_exp2_xpu_complex64", + "test_exp_xpu_complex64", + "test_reference_numerics_extremal__refs_exp_xpu_complex64", + "test_reference_numerics_extremal_exp2_xpu_complex64", + "test_reference_numerics_extremal_exp_xpu_complex64", + "test_reference_numerics_large_exp_xpu_complex32", # AssertionError: Jiterator is only supported on CUDA and ROCm GPUs, none are available. "_jiterator_", # CPU Fallback fails: Tensor-likes are not close! From 9abc0625525c4640ce4406da9b769b8205b37253 Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Mon, 23 Dec 2024 23:26:08 -0800 Subject: [PATCH 10/13] Skip unstable case --- test/xpu/skip_list_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index 7a0c5b503..e81f57552 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -1,5 +1,6 @@ skip_dict = { "test_ops_xpu.py": ( + "test_noncontiguous_samples_histogram_xpu_float32", "test_non_standard_bool_values_index_put_xpu_bool", # Skip list of base line "test_dtypes___rmod___xpu", From 5e10e5f0c0f78e15cbe43b2463d668370ba0b25b Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Tue, 24 Dec 2024 17:25:48 -0800 Subject: [PATCH 11/13] Skip cases due to host seg fault --- test/xpu/skip_list_common.py | 73 ------------------------------------ 1 file changed, 73 deletions(-) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index e81f57552..2b9235efc 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -2327,79 +2327,6 @@ "nn/test_pruning_xpu.py": None, - "test_foreach_xpu.py": ( - # CPU fallback fails. Implementation difference between CPU and CUDA. Expect success on CPU and expect fail on CUDA. When we use CPU fallback and align expected fail list with CUDA, these cases fail. - # Unexpected success - "test_parity__foreach_ceil_fastpath_inplace_xpu_complex128", - "test_parity__foreach_ceil_fastpath_inplace_xpu_complex64", - "test_parity__foreach_ceil_fastpath_outplace_xpu_complex128", - "test_parity__foreach_ceil_fastpath_outplace_xpu_complex64", - "test_parity__foreach_clamp_max_fastpath_inplace_xpu_complex128", - "test_parity__foreach_clamp_max_fastpath_inplace_xpu_complex64", - "test_parity__foreach_clamp_max_fastpath_outplace_xpu_complex128", - "test_parity__foreach_clamp_max_fastpath_outplace_xpu_complex64", - "test_parity__foreach_clamp_min_fastpath_inplace_xpu_complex128", - "test_parity__foreach_clamp_min_fastpath_inplace_xpu_complex64", - "test_parity__foreach_clamp_min_fastpath_outplace_xpu_complex128", - "test_parity__foreach_clamp_min_fastpath_outplace_xpu_complex64", - "test_parity__foreach_erf_fastpath_inplace_xpu_complex128", - "test_parity__foreach_erf_fastpath_inplace_xpu_complex64", - "test_parity__foreach_erf_fastpath_outplace_xpu_complex128", - "test_parity__foreach_erf_fastpath_outplace_xpu_complex64", - "test_parity__foreach_erfc_fastpath_inplace_xpu_complex128", - "test_parity__foreach_erfc_fastpath_inplace_xpu_complex64", - "test_parity__foreach_erfc_fastpath_outplace_xpu_complex128", - "test_parity__foreach_erfc_fastpath_outplace_xpu_complex64", - "test_parity__foreach_floor_fastpath_inplace_xpu_complex128", - "test_parity__foreach_floor_fastpath_inplace_xpu_complex64", - "test_parity__foreach_floor_fastpath_outplace_xpu_complex128", - "test_parity__foreach_floor_fastpath_outplace_xpu_complex64", - "test_parity__foreach_frac_fastpath_inplace_xpu_complex128", - "test_parity__foreach_frac_fastpath_inplace_xpu_complex64", - "test_parity__foreach_frac_fastpath_outplace_xpu_complex128", - "test_parity__foreach_frac_fastpath_outplace_xpu_complex64", - "test_parity__foreach_lgamma_fastpath_inplace_xpu_bfloat16", - "test_parity__foreach_lgamma_fastpath_inplace_xpu_complex128", - "test_parity__foreach_lgamma_fastpath_inplace_xpu_complex64", - "test_parity__foreach_lgamma_fastpath_outplace_xpu_bfloat16", - "test_parity__foreach_lgamma_fastpath_outplace_xpu_complex128", - "test_parity__foreach_lgamma_fastpath_outplace_xpu_complex64", - "test_parity__foreach_maximum_fastpath_inplace_xpu_complex128", - "test_parity__foreach_maximum_fastpath_inplace_xpu_complex64", - "test_parity__foreach_maximum_fastpath_outplace_xpu_complex128", - "test_parity__foreach_maximum_fastpath_outplace_xpu_complex64", - "test_parity__foreach_minimum_fastpath_inplace_xpu_complex128", - "test_parity__foreach_minimum_fastpath_inplace_xpu_complex64", - "test_parity__foreach_minimum_fastpath_outplace_xpu_complex128", - "test_parity__foreach_minimum_fastpath_outplace_xpu_complex64", - "test_parity__foreach_round_fastpath_inplace_xpu_complex128", - "test_parity__foreach_round_fastpath_inplace_xpu_complex64", - "test_parity__foreach_round_fastpath_outplace_xpu_complex128", - "test_parity__foreach_round_fastpath_outplace_xpu_complex64", - "test_parity__foreach_sigmoid_fastpath_inplace_xpu_complex128", - "test_parity__foreach_sigmoid_fastpath_inplace_xpu_complex64", - "test_parity__foreach_sigmoid_fastpath_outplace_xpu_complex128", - "test_parity__foreach_sigmoid_fastpath_outplace_xpu_complex64", - "test_parity__foreach_sign_fastpath_inplace_xpu_complex128", - "test_parity__foreach_sign_fastpath_inplace_xpu_complex64", - "test_parity__foreach_sign_fastpath_outplace_xpu_complex128", - "test_parity__foreach_sign_fastpath_outplace_xpu_complex64", - "test_parity__foreach_trunc_fastpath_inplace_xpu_complex128", - "test_parity__foreach_trunc_fastpath_inplace_xpu_complex64", - "test_parity__foreach_trunc_fastpath_outplace_xpu_complex128", - "test_parity__foreach_trunc_fastpath_outplace_xpu_complex64", - "test_autodiff__foreach_sigmoid_inplace_xpu_complex128", - "test_autodiff__foreach_sigmoid_outplace_xpu_complex128", - "test_binary_op_with_scalar_self_support__foreach_pow_is_fastpath_True_xpu_bool", - # AssertionError: RuntimeError not raised - "test_0dim_tensor_overload_exception_xpu", - # RuntimeError: Tried to instantiate dummy base class CUDAGraph - "test_big_num_tensors__foreach_max_use_cuda_graph_True_xpu_float32", - "test_big_num_tensors__foreach_max_use_cuda_graph_True_xpu_float64", - "test_big_num_tensors__foreach_norm_use_cuda_graph_True_xpu_float32", - "test_big_num_tensors__foreach_norm_use_cuda_graph_True_xpu_float64", - ), - "nn/test_convolution_xpu.py": ( # XPU unsupport ops, skip. # https://github.com/intel/torch-xpu-ops/issues/348 From a593c76fd08ab93beb9fbd8f36fcf9c867fffa35 Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Wed, 25 Dec 2024 23:26:35 -0800 Subject: [PATCH 12/13] Using correct feature-test macro for "Extended enqueue functions" --- src/comm/SYCLHelpers.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/comm/SYCLHelpers.h b/src/comm/SYCLHelpers.h index 48df65221..05a4cdc85 100644 --- a/src/comm/SYCLHelpers.h +++ b/src/comm/SYCLHelpers.h @@ -86,8 +86,7 @@ sycl_kernel_submit( ::sycl::range local_range, ::sycl::queue q, ker_t ker) { -#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ - __INTEL_LLVM_COMPILER_VERSION >= 20250000 +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ker.sycl_ker_config_convention(cgh); ::sycl::ext::oneapi::experimental::nd_launch( @@ -113,8 +112,7 @@ sycl_kernel_submit( ::sycl::range local_range, ::sycl::queue q, ker_t ker) { -#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ - __INTEL_LLVM_COMPILER_VERSION >= 20250000 +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::nd_launch( cgh, ::sycl::nd_range(global_range, local_range), ker); @@ -138,8 +136,7 @@ sycl_kernel_submit( int64_t local_range, ::sycl::queue q, ker_t ker) { -#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ - __INTEL_LLVM_COMPILER_VERSION >= 20250000 +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ker.sycl_ker_config_convention(cgh); ::sycl::ext::oneapi::experimental::nd_launch( @@ -170,8 +167,7 @@ sycl_kernel_submit( int64_t local_range, ::sycl::queue q, ker_t ker) { -#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ - __INTEL_LLVM_COMPILER_VERSION >= 20250000 +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::nd_launch( cgh, From 8d657d1a340a55af672f160f9fd981700dd0c7e8 Mon Sep 17 00:00:00 2001 From: Feng Yuan Date: Wed, 1 Jan 2025 17:19:51 -0800 Subject: [PATCH 13/13] Fixing a missing change --- src/comm/SYCLHelpers.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/comm/SYCLHelpers.h b/src/comm/SYCLHelpers.h index 05a4cdc85..e517e3cbc 100644 --- a/src/comm/SYCLHelpers.h +++ b/src/comm/SYCLHelpers.h @@ -50,8 +50,7 @@ static inline void sycl_kernel_submit( ::sycl::range range, ::sycl::queue q, ker_t ker) { -#if defined(__INTEL_LLVM_COMPILER_VERSION) && \ - __INTEL_LLVM_COMPILER_VERSION >= 20250000 +#if SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS == 1 auto cgf = [&](::sycl::handler& cgh) { ::sycl::ext::oneapi::experimental::parallel_for(cgh, range, ker); };