Skip to content

Commit

Permalink
[Preview] Integrated GPU / Intel GPU support through SYCL. (#7114)
Browse files Browse the repository at this point in the history
- SYCL support for Tensor ops.
- SYCL support for linear algebra.
- Update C++ and Python unit tests. These can be run locally, but do not run on GIthub since no integrated or discrete GPU is available. SYCL CPU device is not as well supported by OneAPI and gives errors in some tests.
- Build preview sycl wheel [Python 3.10 only]
- Reduce tensor indexer MAX_DIMS from 10 to 5.

TODO:
- Fix SYCL [BUILD_SHARED_LIBS=OFF] CI out of storage github issue.
- Build wheels for other Python versions. 
- Optimize SYCL kernels (especially reduction).
- SYCL support for nearest nbr search.
- SYCL support for hash grids.
- Custom kernels with ParallelFor for supporting geometry operations.

Other fixes:
* Only run ProjectImagesToAlbedo on CPU on x86_64 due to IPP dependency.
  • Loading branch information
ssheorey authored Jan 3, 2025
1 parent 9149c9c commit a5d7326
Show file tree
Hide file tree
Showing 113 changed files with 2,695 additions and 419 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/ubuntu-sycl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@ jobs:
docker/docker_test.sh sycl-static
fi
- name: Upload Python wheel and C++ binary package to GitHub artifacts
if: ${{ matrix.BUILD_SHARED_LIBS == 'ON' }}
uses: actions/upload-artifact@v4
with:
name: open3d-sycl-linux-wheel-and-binary
path: |
open3d-*.whl
open3d-*.tar.xz
if-no-files-found: error
- name: Update devel release
if: ${{ github.ref == 'refs/heads/main' && matrix.BUILD_SHARED_LIBS == 'ON' }}
env:
GH_TOKEN: ${{ github.token }}
run: |
if [ ${{ matrix.BUILD_SHARED_LIBS }} == 'ON' ] ; then
gh release upload main-devel open3d-*.whl --clobber
gh release upload main-devel open3d-*.tar.xz --clobber
fi
gh release view main-devel
- name: GCloud CLI auth
if: ${{ github.ref == 'refs/heads/main' }}
uses: 'google-github-actions/auth@v2'
Expand Down
13 changes: 11 additions & 2 deletions 3rdparty/find_dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1534,12 +1534,17 @@ open3d_import_3rdparty_library(3rdparty_uvatlas
list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_CUSTOM Open3D::3rdparty_uvatlas)


# SYCL link options are specified here. Compile options are only applied to SYCL source files and are specified in cmake/Open3DSYCLTargetSources.cmake
if(BUILD_SYCL_MODULE)
add_library(3rdparty_sycl INTERFACE)
target_link_libraries(3rdparty_sycl INTERFACE
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:sycl>)
target_link_options(3rdparty_sycl INTERFACE
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-fsycl -fsycl-targets=spir64_x86_64>)
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-fsycl -fsycl-targets=${OPEN3D_SYCL_TARGETS}>)
if (OPEN3D_SYCL_TARGET_BACKEND_OPTIONS)
target_link_options(3rdparty_sycl INTERFACE
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-Xs ${OPEN3D_SYCL_TARGET_BACKEND_OPTIONS}>)
endif()
if(NOT BUILD_SHARED_LIBS OR arg_PUBLIC)
install(TARGETS 3rdparty_sycl EXPORT Open3DTargets)
endif()
Expand All @@ -1565,8 +1570,12 @@ if(OPEN3D_USE_ONEAPI_PACKAGES)
GROUPED
INCLUDE_DIRS ${MKL_INCLUDE}/
LIB_DIR ${MKL_ROOT}/lib/intel64
LIBRARIES mkl_intel_ilp64 mkl_tbb_thread mkl_core
LIBRARIES $<$<BOOL:${BUILD_SYCL_MODULE}>:mkl_sycl> mkl_intel_ilp64 mkl_tbb_thread mkl_core
)
if (BUILD_SYCL_MODULE)
# target_link_options(3rdparty_mkl INTERFACE "-Wl,-export-dynamic")
target_link_libraries(3rdparty_mkl INTERFACE OpenCL)
endif()
# MKL definitions
target_compile_options(3rdparty_mkl INTERFACE "$<$<PLATFORM_ID:Linux,Darwin>:$<$<COMPILE_LANGUAGE:CXX>:-m64>>")
target_compile_definitions(3rdparty_mkl INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:MKL_ILP64>")
Expand Down
16 changes: 10 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ else()
option(STATIC_WINDOWS_RUNTIME "Use static (MT/MTd) Windows runtime" ON )
endif()
option(BUILD_SYCL_MODULE "Build SYCL module with Intel oneAPI" OFF)
if(BUILD_SYCL_MODULE)
set(OPEN3D_SYCL_TARGETS "spir64" CACHE STRING
"SYCL targets: spir64 for JIT, or another for AOT compilation. See https://github.com/intel/llvm/blob/sycl/sycl/doc/UsersManual.md."
)
set(OPEN3D_SYCL_TARGET_BACKEND_OPTIONS "" CACHE STRING
"SYCL target backend options, e.g. to compile for a specific device. See https://github.com/intel/llvm/blob/sycl/sycl/doc/UsersManual.md."
)
set(BUILD_ISPC_MODULE OFF CACHE BOOL "Build the ISPC module" FORCE)
set(BUILD_CUDA_MODULE OFF CACHE BOOL "Build the CUDA module" FORCE)
endif()
option(GLIBCXX_USE_CXX11_ABI "Set -D_GLIBCXX_USE_CXX11_ABI=1" ON )
option(ENABLE_SYCL_UNIFIED_SHARED_MEMORY "Enable SYCL unified shared memory" OFF)
if(BUILD_GUI AND (WIN32 OR UNIX AND NOT LINUX_AARCH64 AND NOT APPLE_AARCH64))
Expand Down Expand Up @@ -282,12 +292,6 @@ endif()
if(BUILD_SYCL_MODULE AND NOT GLIBCXX_USE_CXX11_ABI)
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires GLIBCXX_USE_CXX11_ABI=ON")
endif()
if(BUILD_SYCL_MODULE AND BUILD_TENSORFLOW_OPS)
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires BUILD_TENSORFLOW_OPS=OFF")
endif()
if(BUILD_SYCL_MODULE AND BUILD_PYTORCH_OPS)
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires BUILD_PYTORCH_OPS=OFF")
endif()
if(BUILD_SYCL_MODULE AND BUILD_CUDA_MODULE)
message(FATAL_ERROR "BUILD_SYCL_MODULE and BUILD_SYCL_MODULE cannot be on at the same time for now.")
endif()
Expand Down
5 changes: 3 additions & 2 deletions cmake/Open3DSYCLTargetSources.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#
# When BUILD_SYCL_MODULE=ON, set SYCL-specific compile flags for the listed
# source files and call target_sources(). If BUILD_SYCL_MODULE=OFF, this
# function directly calls target_sources().
# function directly calls target_sources(). For SYCL link options, see
# 3rdparty/find_dependencies.cmake
#
# Note: this is not a perfect forwarding to target_sources(), as it only support
# limited set of arguments. See the example usage below.
Expand Down Expand Up @@ -31,7 +32,7 @@ function(open3d_sycl_target_sources target)
if(BUILD_SYCL_MODULE)
foreach(sycl_file IN LISTS arg_UNPARSED_ARGUMENTS)
set_source_files_properties(${sycl_file} PROPERTIES
COMPILE_OPTIONS -fsycl -fsycl-unnamed-lambda -fsycl-targets=spir64_x86_64)
COMPILE_OPTIONS "-fsycl;-fsycl-targets=${OPEN3D_SYCL_TARGETS}")
if(arg_VERBOSE)
message(STATUS "open3d_sycl_target_sources(${target}): marked ${sycl_file} as SYCL code")
endif()
Expand Down
9 changes: 5 additions & 4 deletions cmake/Open3DSetGlobalProperties.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@ function(open3d_enable_strip target)
endif()
endfunction()

# RPATH handling (for TBB DSO). Check current folder, one folder above and the lib sibling folder
# RPATH handling (for TBB DSO). Check current folder, one folder above and the lib sibling folder.
# Also check the Python virtual env /lib folder for 3rd party dependency libraries installed with `pip install`
set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
if (APPLE)
# Add options to cover the various ways in which open3d shaed lib or apps can be installed wrt TBB DSO
set(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/../;@loader_path/../lib/")
# Add options to cover the various ways in which open3d shared lib or apps can be installed wrt TBB DSO
set(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/../;@loader_path/../lib/;@loader_path/../../../../")
# pybind with open3d shared lib is copied, not cmake-installed, so we need to add .. to build rpath
set(CMAKE_BUILD_RPATH "@loader_path/../")
elseif(UNIX)
set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../;$ORIGIN/../lib/")
set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../;$ORIGIN/../lib/;$ORIGIN/../../../../")
set(CMAKE_BUILD_RPATH "$ORIGIN/../")
endif()

Expand Down
5 changes: 5 additions & 0 deletions cpp/apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ macro(open3d_add_app_common SRC_DIR APP_NAME TARGET_NAME)
target_link_libraries(${TARGET_NAME} PRIVATE Open3D::Open3D TBB::tbb ${ARGN})
set_target_properties(${TARGET_NAME} PROPERTIES FOLDER "apps")

if (BUILD_SYCL_MODULE)
find_package(IntelSYCL REQUIRED) # requires cmake>=3.25 on Windows
add_sycl_to_target(TARGET ${TARGET_NAME})
endif()

open3d_link_3rdparty_libraries(${TARGET_NAME})
open3d_show_and_abort_on_warning(${TARGET_NAME})
open3d_set_global_properties(${TARGET_NAME})
Expand Down
4 changes: 4 additions & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ if (BUILD_CUDA_MODULE)
find_package(CUDAToolkit REQUIRED)
target_link_libraries(benchmarks PRIVATE CUDA::cudart)
endif()
if (BUILD_SYCL_MODULE)
find_package(IntelSYCL REQUIRED) # requires cmake>=3.25 on Windows
add_sycl_to_target(TARGET benchmarks)
endif()

open3d_show_and_abort_on_warning(benchmarks)
open3d_set_global_properties(benchmarks)
Expand Down
49 changes: 34 additions & 15 deletions cpp/open3d/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ target_sources(core PRIVATE
CUDAUtils.cpp
Device.cpp
Dtype.cpp
EigenConverter.cpp
Indexer.cpp
MemoryManager.cpp
MemoryManagerCached.cpp
Expand All @@ -23,6 +22,7 @@ target_sources(core PRIVATE

# Compile regardless BUILD_SYCL_MODULE == ON or OFF.
open3d_sycl_target_sources(core PRIVATE
EigenConverter.cpp
SYCLUtils.cpp
)

Expand All @@ -43,22 +43,14 @@ target_sources(core PRIVATE
hashmap/HashSet.cpp
kernel/Kernel.cpp
linalg/AddMM.cpp
linalg/AddMMCPU.cpp
linalg/Det.cpp
linalg/Inverse.cpp
linalg/InverseCPU.cpp
linalg/LeastSquares.cpp
linalg/LeastSquaresCPU.cpp
linalg/LU.cpp
linalg/LUCPU.cpp
linalg/Matmul.cpp
linalg/MatmulCPU.cpp
linalg/Solve.cpp
linalg/SolveCPU.cpp
linalg/SVD.cpp
linalg/SVDCPU.cpp
linalg/Tri.cpp
linalg/TriCPU.cpp
nns/FixedRadiusIndex.cpp
nns/FixedRadiusSearchOps.cpp
nns/KnnIndex.cpp
Expand All @@ -73,21 +65,48 @@ set_target_properties(core_impl PROPERTIES CXX_VISIBILITY_PRESET "hidden")

target_sources(core_impl PRIVATE
kernel/Arange.cpp
kernel/ArangeCPU.cpp
kernel/BinaryEW.cpp
kernel/BinaryEWCPU.cpp
kernel/IndexGetSet.cpp
kernel/IndexGetSetCPU.cpp
kernel/IndexReduction.cpp
kernel/IndexReductionCPU.cpp
kernel/NonZero.cpp
kernel/NonZeroCPU.cpp
kernel/Reduction.cpp
kernel/ReductionCPU.cpp
kernel/UnaryEW.cpp
kernel/ArangeCPU.cpp
kernel/BinaryEWCPU.cpp
kernel/IndexGetSetCPU.cpp
kernel/IndexReductionCPU.cpp
kernel/NonZeroCPU.cpp
kernel/ReductionCPU.cpp
kernel/UnaryEWCPU.cpp
linalg/AddMMCPU.cpp
linalg/InverseCPU.cpp
linalg/LeastSquaresCPU.cpp
linalg/LUCPU.cpp
linalg/MatmulCPU.cpp
linalg/SolveCPU.cpp
linalg/SVDCPU.cpp
linalg/TriCPU.cpp
)

if (BUILD_SYCL_MODULE)
open3d_sycl_target_sources(core_impl PRIVATE
kernel/UnaryEWSYCL.cpp
kernel/BinaryEWSYCL.cpp
kernel/ArangeSYCL.cpp
kernel/IndexGetSetSYCL.cpp
kernel/NonZeroSYCL.cpp
kernel/IndexReductionSYCL.cpp
kernel/ReductionSYCL.cpp
linalg/AddMMSYCL.cpp
linalg/InverseSYCL.cpp
linalg/LeastSquaresSYCL.cpp
linalg/LUSYCL.cpp
linalg/MatmulSYCL.cpp
linalg/SolveSYCL.cpp
linalg/SVDSYCL.cpp
linalg/TriSYCL.cpp
)
endif()

if (BUILD_CUDA_MODULE)
target_sources(core PRIVATE
Expand Down
8 changes: 8 additions & 0 deletions cpp/open3d/core/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,12 @@ struct hash<open3d::core::Device> {
return std::hash<std::string>{}(device.ToString());
}
};

template <>
struct less<open3d::core::Device> {
bool operator()(const open3d::core::Device& lhs,
const open3d::core::Device& rhs) const {
return lhs.ToString() < rhs.ToString();
}
};
} // namespace std
6 changes: 3 additions & 3 deletions cpp/open3d/core/Indexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ class Indexer;
class IndexerIterator;

// Maximum number of dimensions of TensorRef.
static constexpr int64_t MAX_DIMS = 10;
static constexpr int64_t MAX_DIMS = 5;

// Maximum number of inputs of an op.
// MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
static constexpr int64_t MAX_INPUTS = 10;
static constexpr int64_t MAX_INPUTS = 5;

// Maximum number of outputs of an op. This number can be increased when
// necessary.
Expand Down Expand Up @@ -110,7 +110,7 @@ struct TensorRef {

TensorRef(const Tensor& t) {
if (t.NumDims() > MAX_DIMS) {
utility::LogError("Tenor has too many dimensions {} > {}.",
utility::LogError("Tensor has too many dimensions {} > {}.",
t.NumDims(), MAX_DIMS);
}
data_ptr_ = const_cast<void*>(t.GetDataPtr());
Expand Down
4 changes: 2 additions & 2 deletions cpp/open3d/core/Indexer.isph
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
#include "open3d/utility/Helper.isph"

// Maximum number of dimensions of TensorRef.
enum { MAX_DIMS = 10 };
enum { MAX_DIMS = 4 };

// Maximum number of inputs of an op.
// MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
enum { MAX_INPUTS = 10 };
enum { MAX_INPUTS = 4 };

// Maximum number of outputs of an op. This number can be increased when
// necessary.
Expand Down
5 changes: 5 additions & 0 deletions cpp/open3d/core/ParallelFor.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ void ParallelForCPU_(const Device& device, int64_t n, const func_t& func) {
/// \note If you use a lambda function, capture only the required variables
/// instead of all to prevent accidental race conditions. If you want the
/// kernel to be used on both CPU and CUDA, capture the variables by value.
/// \note This does not dispatch to SYCL, since SYCL has extra constraints:
/// - Lambdas may capture by value only.
/// - No function pointers / virtual functions.
/// Auto dispatch to SYCL will enforce these conditions even on CPU devices. Use
/// ParallelForSYCL instead.
template <typename func_t>
void ParallelFor(const Device& device, int64_t n, const func_t& func) {
#ifdef __CUDACC__
Expand Down
63 changes: 63 additions & 0 deletions cpp/open3d/core/ParallelForSYCL.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// ----------------------------------------------------------------------------
// - Open3D: www.open3d.org -
// ----------------------------------------------------------------------------
// Copyright (c) 2018-2024 www.open3d.org
// SPDX-License-Identifier: MIT
// ----------------------------------------------------------------------------

#pragma once

#include <cstdint>
#include <type_traits>

#include "open3d/core/Device.h"
#include "open3d/core/Indexer.h"
#include "open3d/core/SYCLContext.h"
#include "open3d/utility/Logging.h"

namespace open3d {
namespace core {

/// Run a function in parallel with SYCL.
template <typename Functor, typename... FuncArgs>
void ParallelForSYCL(const Device& device,
Indexer indexer,
FuncArgs... func_args) {
if (!device.IsSYCL()) {
utility::LogError("ParallelFor for SYCL cannot run on device {}.",
device.ToString());
}
int64_t n = indexer.NumWorkloads();
if (n == 0) {
return;
}
auto queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
/// TODO: Specify grid size based on device properties
queue.parallel_for<Functor>(n, [indexer, func_args...](int64_t i) {
Functor ef(indexer, func_args...);
ef(i);
}).wait_and_throw();
}

/// Run a function in parallel with SYCL.
template <typename Functor, typename... FuncArgs>
void ParallelForSYCL(const Device& device,
int64_t num_workloads,
FuncArgs... func_args) {
if (!device.IsSYCL()) {
utility::LogError("ParallelFor for SYCL cannot run on device {}.",
device.ToString());
}
if (num_workloads == 0) {
return;
}
auto queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
/// TODO: Specify grid size based on device properties
queue.parallel_for<Functor>(num_workloads, [func_args...](int64_t i) {
Functor ef(func_args...);
ef(i);
}).wait_and_throw();
}

} // namespace core
} // namespace open3d
Loading

0 comments on commit a5d7326

Please sign in to comment.