diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 070c39eb6e9a..971e5d5c93f2 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS if (FLANG_BUILD_TOOLS) add_subdirectory(tools) endif() + +option(FLANG_CUF_RUNTIME + "Compile CUDA Fortran runtime sources" OFF) +if (FLANG_CUF_RUNTIME) + find_package(CUDAToolkit REQUIRED) +endif() + add_subdirectory(runtime) if (LLVM_INCLUDE_EXAMPLES) diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h new file mode 100644 index 000000000000..9f6fb55bea74 --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/allocator.h @@ -0,0 +1,44 @@ +//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ +#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ + +#include "flang/Runtime/descriptor.h" + +static constexpr unsigned kPinnedAllocatorPos = 1; +static constexpr unsigned kDeviceAllocatorPos = 2; +static constexpr unsigned kManagedAllocatorPos = 3; + +#define CUDA_REPORT_IF_ERROR(expr) \ + [](CUresult result) { \ + if (!result) \ + return; \ + const char *name = nullptr; \ + cuGetErrorName(result, &name); \ + if (!name) \ + name = ""; \ + Terminator terminator{__FILE__, __LINE__}; \ + terminator.Crash("'%s' failed with '%s'", #expr, name); \ + }(expr) + +namespace Fortran::runtime::cuf { + +void CUFRegisterAllocator(); + +void *CUFAllocPinned(std::size_t); +void CUFFreePinned(void *); + +void *CUFAllocDevice(std::size_t); +void CUFFreeDevice(void *); + +void *CUFAllocManaged(std::size_t); +void CUFFreeManaged(void *); + +} // namespace Fortran::runtime::cuf +#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt index 1f3ae23dcbf1..4537b2d059d6 100644 --- a/flang/runtime/CMakeLists.txt +++ b/flang/runtime/CMakeLists.txt @@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files) add_dependencies(FortranRuntime flang-new module_files) endif() +if (FLANG_CUF_RUNTIME) + add_subdirectory(CUDA) +endif() diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt new file mode 100644 index 000000000000..de1104f07ce6 --- /dev/null +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -0,0 +1,19 @@ +#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# + +include_directories(${CUDAToolkit_INCLUDE_DIRS}) +find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED) + +add_flang_library(CufRuntime + allocator.cpp +) +target_link_libraries(CufRuntime + PRIVATE + FortranRuntime + ${CUDA_RUNTIME_LIBRARY} +) diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp new file mode 100644 index 000000000000..02eaba563699 --- /dev/null +++ b/flang/runtime/CUDA/allocator.cpp @@ -0,0 +1,60 @@ +//===-- runtime/CUDA/allocator.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/allocator.h" +#include "../allocator-registry.h" +#include "../derived.h" +#include "../stat.h" +#include "../terminator.h" +#include "../type-info.h" +#include "flang/Common/Fortran.h" +#include "flang/ISO_Fortran_binding_wrapper.h" + +#include "cuda.h" + +namespace Fortran::runtime::cuf { + +void CUFRegisterAllocator() { + allocatorRegistry.Register( + kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned}); + allocatorRegistry.Register( + kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice}); + allocatorRegistry.Register( + kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged}); +} + +void *CUFAllocPinned(std::size_t sizeInBytes) { + void *p; + CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes)); + return p; +} + +void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); } + +void *CUFAllocDevice(std::size_t sizeInBytes) { + CUdeviceptr p = 0; + CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes)); + return reinterpret_cast(p); +} + +void CUFFreeDevice(void *p) { + CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast(p))); +} + +void *CUFAllocManaged(std::size_t sizeInBytes) { + CUdeviceptr p = 0; + CUDA_REPORT_IF_ERROR( + cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL)); + return reinterpret_cast(p); +} + +void CUFFreeManaged(void *p) { + CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast(p))); +} + +} // namespace Fortran::runtime::cuf diff --git a/flang/unittests/Runtime/CMakeLists.txt b/flang/unittests/Runtime/CMakeLists.txt index ed047b08ada3..2c3f8c1a9e9a 100644 --- a/flang/unittests/Runtime/CMakeLists.txt +++ b/flang/unittests/Runtime/CMakeLists.txt @@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests PRIVATE FortranRuntime ) + +add_subdirectory(CUDA) diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp new file mode 100644 index 000000000000..2a7c7fe25de8 --- /dev/null +++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -0,0 +1,88 @@ +//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "../../../runtime/terminator.h" +#include "flang/Common/Fortran.h" +#include "flang/Runtime/CUDA/allocator.h" +#include "flang/Runtime/allocatable.h" + +#include "cuda.h" + +using namespace Fortran::runtime; + +static OwningPtr createAllocatable( + Fortran::common::TypeCategory tc, int kind, int rank = 1) { + return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr, + CFI_attribute_allocatable); +} + +thread_local static int32_t defaultDevice = 0; + +CUdevice getDefaultCuDevice() { + CUdevice device; + CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice)); + return device; +} + +class ScopedContext { +public: + ScopedContext() { + // Static reference to CUDA primary context for device ordinal + // defaultDevice. + static CUcontext context = [] { + CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0)); + CUcontext ctx; + // Note: this does not affect the current context. + CUDA_REPORT_IF_ERROR( + cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice())); + return ctx; + }(); + + CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context)); + } + + ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); } +}; + +TEST(AllocatableCUFTest, SimpleDeviceAllocate) { + using Fortran::common::TypeCategory; + Fortran::runtime::cuf::CUFRegisterAllocator(); + ScopedContext ctx; + // REAL(4), DEVICE, ALLOCATABLE :: a(:) + auto a{createAllocatable(TypeCategory::Real, 4)}; + a->SetAllocIdx(kDeviceAllocatorPos); + EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx()); + EXPECT_FALSE(a->HasAddendum()); + RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); + RTNAME(AllocatableAllocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_TRUE(a->IsAllocated()); + RTNAME(AllocatableDeallocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_FALSE(a->IsAllocated()); +} + +TEST(AllocatableCUFTest, SimplePinnedAllocate) { + using Fortran::common::TypeCategory; + Fortran::runtime::cuf::CUFRegisterAllocator(); + ScopedContext ctx; + // INTEGER(4), PINNED, ALLOCATABLE :: a(:) + auto a{createAllocatable(TypeCategory::Integer, 4)}; + EXPECT_FALSE(a->HasAddendum()); + a->SetAllocIdx(kPinnedAllocatorPos); + EXPECT_EQ((int)kPinnedAllocatorPos, a->GetAllocIdx()); + EXPECT_FALSE(a->HasAddendum()); + RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); + RTNAME(AllocatableAllocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_TRUE(a->IsAllocated()); + RTNAME(AllocatableDeallocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_FALSE(a->IsAllocated()); +} diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt new file mode 100644 index 000000000000..14b5c788719b --- /dev/null +++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt @@ -0,0 +1,15 @@ +if (FLANG_CUF_RUNTIME) + +add_flang_unittest(FlangCufRuntimeTests + AllocatorCUF.cpp +) + +target_link_libraries(FlangCufRuntimeTests + PRIVATE + CufRuntime + FortranRuntime +) + +target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) + +endif()