Skip to content

Commit c0db6b3

Browse files
committed
Merge branch 'main' into default_mr
2 parents f65c54c + 314060c commit c0db6b3

File tree

9 files changed

+1569
-0
lines changed

9 files changed

+1569
-0
lines changed

cuda_core/cuda/core/experimental/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
from cuda.core.experimental import utils
66
from cuda.core.experimental._device import Device
77
from cuda.core.experimental._event import Event, EventOptions
8+
from cuda.core.experimental._graph import (
9+
Graph,
10+
GraphBuilder,
11+
GraphCompleteOptions,
12+
GraphDebugPrintOptions,
13+
)
814
from cuda.core.experimental._launch_config import LaunchConfig
915
from cuda.core.experimental._launcher import launch
1016
from cuda.core.experimental._linker import Linker, LinkerOptions

cuda_core/cuda/core/experimental/_device.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from cuda.core.experimental._context import Context, ContextOptions
99
from cuda.core.experimental._event import Event, EventOptions
10+
from cuda.core.experimental._graph import GraphBuilder
1011
from cuda.core.experimental._memory import Buffer, DeviceMemoryResource, MemoryResource, _SynchronousMemoryResource
1112
from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions, default_stream
1213
from cuda.core.experimental._utils.clear_error_support import assert_type
@@ -1298,3 +1299,15 @@ def sync(self):
12981299
12991300
"""
13001301
handle_return(runtime.cudaDeviceSynchronize())
1302+
1303+
@precondition(_check_context_initialized)
1304+
def create_graph_builder(self) -> GraphBuilder:
1305+
"""Create a new :obj:`~_graph.GraphBuilder` object.
1306+
1307+
Returns
1308+
-------
1309+
:obj:`~_graph.GraphBuilder`
1310+
Newly created graph builder object.
1311+
1312+
"""
1313+
return GraphBuilder._init(stream=self.create_stream(), is_stream_owner=True)

cuda_core/cuda/core/experimental/_graph.py

Lines changed: 774 additions & 0 deletions
Large diffs are not rendered by default.

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import ctypes
1616
import numpy
1717

1818
from cuda.core.experimental._memory import Buffer
19+
from cuda.core.experimental._utils.cuda_utils import driver
1920

2021

2122
ctypedef cpp_complex.complex[float] cpp_single_complex
@@ -235,6 +236,10 @@ cdef class ParamHolder:
235236
if not_prepared:
236237
not_prepared = prepare_ctypes_arg(self.data, self.data_addresses, arg, i)
237238
if not_prepared:
239+
# TODO: revisit this treatment if we decide to cythonize cuda.core
240+
if isinstance(arg, driver.CUgraphConditionalHandle):
241+
prepare_arg[intptr_t](self.data, self.data_addresses, <intptr_t>int(arg), i)
242+
continue
238243
# TODO: support ctypes/numpy struct
239244
raise TypeError("the argument is of unsupported type: " + str(type(arg)))
240245

cuda_core/cuda/core/experimental/_stream.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from cuda.core.experimental._device import Device
1616
from cuda.core.experimental._context import Context
1717
from cuda.core.experimental._event import Event, EventOptions
18+
from cuda.core.experimental._graph import GraphBuilder
1819
from cuda.core.experimental._utils.clear_error_support import assert_type
1920
from cuda.core.experimental._utils.cuda_utils import (
2021
check_or_create_options,
@@ -355,6 +356,19 @@ def __cuda_stream__(self):
355356

356357
return Stream._init(obj=_stream_holder())
357358

359+
def create_graph_builder(self) -> GraphBuilder:
360+
"""Create a new :obj:`~_graph.GraphBuilder` object.
361+
362+
The new graph builder will be associated with this stream.
363+
364+
Returns
365+
-------
366+
:obj:`~_graph.GraphBuilder`
367+
Newly created graph builder object.
368+
369+
"""
370+
return GraphBuilder._init(stream=self, is_stream_owner=False)
371+
358372

359373
LEGACY_DEFAULT_STREAM = Stream._legacy_default()
360374
PER_THREAD_DEFAULT_STREAM = Stream._per_thread_default()

cuda_core/docs/source/api.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ CUDA runtime
1717
:toctree: generated/
1818

1919
Device
20+
Graph
21+
GraphBuilder
2022
launch
2123
Buffer
2224
MemoryResource
@@ -26,6 +28,8 @@ CUDA runtime
2628
:template: dataclass.rst
2729

2830
EventOptions
31+
GraphCompleteOptions
32+
GraphDebugPrintOptions
2933
StreamOptions
3034
LaunchConfig
3135

cuda_core/docs/source/release/0.3.0-notes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ Highlights
1212
----------
1313

1414
- Starting this release ``cuda.core`` is licensed under Apache 2.0. The biggest implication of this change is that we are open to external contribution now! Please kindly follow the :ref:`Contributor Guide <contributor_guide>` for detailed instructions.
15+
- Initial support for CUDA graphs (phase 1).
16+
- In this release, we support building a CUDA graph that captures kernel launches. The captured graph can be replayed to reduce
17+
latency. Graph split/join and conditional nodes are supported.
1518

1619

1720
Breaking Changes

cuda_core/tests/test_event.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def test_error_timing_incomplete():
172172
arr[0] = 1
173173
event3.sync()
174174
event3 - event1 # this should work
175+
b.close()
175176

176177

177178
def test_event_device(init_cuda):

0 commit comments

Comments
 (0)