Skip to content

Commit 32e1b4e

Browse files
Add --target-cuda argument for selecting CUDA architecture (#2478)
This PR suggests adding `--target-cuda` argument to `scripts/build_locally.py` allowing to enable CUDA support and optionally specify the target architecture (e.g. `sm_80`). If no architecture is specified, `sm_50` is used by default. ```bash $ python scripts/build_locally.py --target-cuda # or $ python scripts/build_locally.py --target-cuda=<arch> ```
1 parent e9e84fc commit 32e1b4e

File tree

4 files changed

+76
-25
lines changed

4 files changed

+76
-25
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
### Added
1010

11+
* Added `--target-cuda[=ARCH]` option to replace the deprecated `--target=cuda`, allowing users to build for CUDA devices with optional architecture selection using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478)
12+
1113
### Changed
1214

1315
* Adjusted the `pre-commit` configuration to run autoupdate weekly [#2479](https://github.com/IntelPython/dpnp/pull/2479)

CMakeLists.txt

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,17 @@ find_package(Dpctl REQUIRED)
6868
message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
6969
message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
7070

71-
option(DPNP_TARGET_CUDA
72-
"Build DPNP to target CUDA devices"
73-
OFF
74-
)
7571
option(DPNP_USE_ONEMKL_INTERFACES
7672
"Build DPNP with oneMKL Interfaces"
7773
OFF
7874
)
75+
set(DPNP_TARGET_CUDA
76+
""
77+
CACHE STRING
78+
"Build DPNP to target CUDA device. \
79+
Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
80+
or to a specific architecture like sm_80."
81+
)
7982
set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
8083

8184
set(_dpnp_sycl_targets)
@@ -87,8 +90,19 @@ set(_dpnp_sycl_target_compile_options)
8790
set(_dpnp_sycl_target_link_options)
8891

8992
if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
90-
if(DPNP_TARGET_CUDA)
91-
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
93+
if (DPNP_TARGET_CUDA)
94+
set(_dpnp_cuda_arch)
95+
if(DPNP_TARGET_CUDA MATCHES "^sm_")
96+
set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA})
97+
elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
98+
set(_dpnp_cuda_arch "sm_50")
99+
else()
100+
message(FATAL_ERROR
101+
"Invalid value for DPNP_TARGET_CUDA: \"${DPNP_TARGET_CUDA}\". "
102+
"Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
103+
)
104+
endif()
105+
set(_dpnp_sycl_targets "nvidia_gpu_${_dpnp_cuda_arch},spir64-unknown-unknown")
92106
set(_use_onemkl_interfaces_cuda ON)
93107
endif()
94108

@@ -104,7 +118,7 @@ if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
104118
else()
105119
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
106120

107-
if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda")
121+
if("${DPNP_SYCL_TARGETS}" MATCHES "(nvidia_gpu_sm_|nvptx64-nvidia-cuda)")
108122
set(_use_onemkl_interfaces_cuda ON)
109123
endif()
110124

doc/quick_start_guide.rst

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,40 @@ installation layout of compatible version. The following plugins from CodePlay a
144144
Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
145145
<https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_
146146

147-
``dpnp`` can be built for CUDA devices as follows:
147+
Builds for CUDA and AMD devices internally use SYCL alias targets that are passed to the compiler.
148+
A full list of available SYCL alias targets is available in the
149+
`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
150+
151+
CUDA build
152+
~~~~~~~~~~
153+
154+
To build for CUDA devices, use the ``--target-cuda`` argument.
155+
156+
To target a specific architecture (e.g., ``sm_80``):
157+
158+
.. code-block:: bash
159+
160+
python scripts/build_locally.py --target-cuda=sm_80
161+
162+
To use the default architecture (``sm_50``), run:
148163

149164
.. code-block:: bash
150165
151-
python scripts/build_locally.py --target=cuda
166+
python scripts/build_locally.py --target-cuda
167+
168+
Note that kernels are built for the default architecture (``sm_50``), allowing them to work on a
169+
wider range of architectures, but limiting the usage of more recent CUDA features.
170+
171+
For reference, compute architecture strings like ``sm_80`` correspond to specific
172+
CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
173+
A complete mapping between NVIDIA GPU models and their respective
174+
Compute Capabilities can be found in the official
175+
`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.
176+
177+
AMD build
178+
~~~~~~~~~
152179

153-
And for AMD devices:
180+
To build for AMD devices, use the ``--target-hip=<arch>`` argument:
154181

155182
.. code-block:: bash
156183
@@ -173,13 +200,17 @@ For example:
173200
.. code-block:: bash
174201
python scripts/build_locally.py --target-hip=gfx90a
175202
203+
Multi-target build
204+
~~~~~~~~~~~~~~~~~~
176205

177-
It is, however, possible to build for Intel devices, CUDA devices, and an AMD device
178-
architecture all at once:
206+
The default ``dpnp`` build from the source enables support of Intel devices only.
207+
Extending the build with a custom SYCL target additionally enables support of CUDA or AMD
208+
device in ``dpnp``. Besides, the support can be also extended to enable both CUDA and AMD
209+
devices at the same time:
179210

180211
.. code-block:: bash
181212
182-
python scripts/build_locally.py --target=cuda --target-hip=gfx90a
213+
python scripts/build_locally.py --target-cuda --target-hip=gfx90a
183214
184215
185216
Testing

scripts/build_locally.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def run(
3838
cmake_executable=None,
3939
verbose=False,
4040
cmake_opts="",
41-
target="intel",
41+
target_cuda=None,
4242
target_hip=None,
4343
onemkl_interfaces=False,
4444
onemkl_interfaces_dir=None,
@@ -98,12 +98,14 @@ def run(
9898
if "DPL_ROOT" in os.environ:
9999
os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]
100100

101-
if not target.strip():
102-
target = "intel"
103-
104-
if target == "cuda":
101+
if target_cuda is not None:
102+
if not target_cuda.strip():
103+
raise ValueError(
104+
"--target-cuda can not be an empty string. "
105+
"Use --target-cuda=<arch> or --target-cuda"
106+
)
105107
cmake_args += [
106-
"-DDPNP_TARGET_CUDA=ON",
108+
f"-DDPNP_TARGET_CUDA={target_cuda}",
107109
]
108110
# Always builds using oneMKL interfaces for the cuda target
109111
onemkl_interfaces = True
@@ -129,7 +131,7 @@ def run(
129131
f"-DDPNP_ONEMKL_INTERFACES_DIR={onemkl_interfaces_dir}",
130132
]
131133
elif onemkl_interfaces_dir:
132-
RuntimeError("--onemkl-interfaces-dir option is not supported")
134+
raise RuntimeError("--onemkl-interfaces-dir option is not supported")
133135

134136
subprocess.check_call(
135137
cmake_args, shell=False, cwd=setup_dir, env=os.environ
@@ -186,10 +188,12 @@ def run(
186188
type=str,
187189
)
188190
driver.add_argument(
189-
"--target",
190-
help="Target backend for build",
191-
dest="target",
192-
default="intel",
191+
"--target-cuda",
192+
nargs="?",
193+
const="ON",
194+
help="Enable CUDA target for build; "
195+
"optionally specify architecture (e.g., --target-cuda=sm_80)",
196+
default=None,
193197
type=str,
194198
)
195199
driver.add_argument(
@@ -265,7 +269,7 @@ def run(
265269
cmake_executable=args.cmake_executable,
266270
verbose=args.verbose,
267271
cmake_opts=args.cmake_opts,
268-
target=args.target,
272+
target_cuda=args.target_cuda,
269273
target_hip=args.target_hip,
270274
onemkl_interfaces=args.onemkl_interfaces,
271275
onemkl_interfaces_dir=args.onemkl_interfaces_dir,

0 commit comments

Comments
 (0)