From 391703c9708e0c873bb755dd0c17c9812eefe69b Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Tue, 17 Dec 2024 14:30:01 +0100 Subject: [PATCH 1/2] ptx: Add cp.async.mbarrier.arrive{.noinc} --- .../instructions/cp_async_mbarrier_arrive.rst | 10 +++++ .../instructions/cp_async_mbarrier_arrive.h | 38 +++++++++++++++++++ libcudacxx/include/cuda/ptx | 1 + ....cp.async.mbarrier.arrive.compile.pass.cpp | 23 +++++++++++ 4 files changed, 72 insertions(+) create mode 100644 docs/libcudacxx/ptx/instructions/cp_async_mbarrier_arrive.rst create mode 100644 libcudacxx/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.mbarrier.arrive.compile.pass.cpp diff --git a/docs/libcudacxx/ptx/instructions/cp_async_mbarrier_arrive.rst b/docs/libcudacxx/ptx/instructions/cp_async_mbarrier_arrive.rst new file mode 100644 index 00000000000..f2ff2ff5ee7 --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/cp_async_mbarrier_arrive.rst @@ -0,0 +1,10 @@ +.. _libcudacxx-ptx-instructions-cp-async-mbarrier-arrive: + +cp.async.mbarrier.arrive +======================== + +- PTX ISA: + `cp.async.mbarrier.arrive `__ + +.. include:: generated/cp_async_mbarrier_arrive.rst +.. include:: generated/cp_async_mbarrier_arrive_noinc.rst diff --git a/libcudacxx/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h b/libcudacxx/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h new file mode 100644 index 00000000000..c19a09e2922 --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h @@ -0,0 +1,38 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_CP_ASYNC_MBARRIER_ARRIVE_H_ +#define _CUDA_PTX_CP_ASYNC_MBARRIER_ARRIVE_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_CP_ASYNC_MBARRIER_ARRIVE_H_ diff --git a/libcudacxx/include/cuda/ptx b/libcudacxx/include/cuda/ptx index 4798973df77..b2181d1663a 100644 --- a/libcudacxx/include/cuda/ptx +++ b/libcudacxx/include/cuda/ptx @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.mbarrier.arrive.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.mbarrier.arrive.compile.pass.cpp new file mode 100644 index 00000000000..97623078198 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.mbarrier.arrive.compile.pass.cpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/cp_async_mbarrier_arrive.h" +#include "generated/cp_async_mbarrier_arrive_noinc.h" + +int main(int, char**) +{ + return 0; +} From 0716a4d47f61518ea299313bce70673c520c6785 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 30 Jan 2025 11:23:05 +0100 Subject: [PATCH 2/2] fix docs --- docs/libcudacxx/ptx/instructions.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/libcudacxx/ptx/instructions.rst b/docs/libcudacxx/ptx/instructions.rst index f0776974eec..85bda32cf50 100644 --- a/docs/libcudacxx/ptx/instructions.rst +++ b/docs/libcudacxx/ptx/instructions.rst @@ -11,6 +11,7 @@ PTX Instructions instructions/cp_async_bulk_commit_group instructions/cp_async_bulk_wait_group instructions/cp_async_bulk_tensor + instructions/cp_async_mbarrier_arrive instructions/cp_reduce_async_bulk instructions/cp_reduce_async_bulk_tensor instructions/fence