Skip to content

Commit

Permalink
[flang][cuda] Add CUF allocator (#101216)
Browse files Browse the repository at this point in the history
Add allocators for CUDA fortran allocation on the device. 3 allocators
are added for pinned, device and managed/unified memory allocation.
`CUFRegisterAllocator()` is called to register the allocators in the
allocator registry added in #100690.


Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to
conditionally build these.
  • Loading branch information
clementval authored Aug 2, 2024
1 parent d6649f2 commit 1417633
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 0 deletions.
7 changes: 7 additions & 0 deletions flang/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
if (FLANG_BUILD_TOOLS)
add_subdirectory(tools)
endif()

option(FLANG_CUF_RUNTIME
"Compile CUDA Fortran runtime sources" OFF)
if (FLANG_CUF_RUNTIME)
find_package(CUDAToolkit REQUIRED)
endif()

add_subdirectory(runtime)

if (LLVM_INCLUDE_EXAMPLES)
Expand Down
44 changes: 44 additions & 0 deletions flang/include/flang/Runtime/CUDA/allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_

#include "flang/Runtime/descriptor.h"

static constexpr unsigned kPinnedAllocatorPos = 1;
static constexpr unsigned kDeviceAllocatorPos = 2;
static constexpr unsigned kManagedAllocatorPos = 3;

#define CUDA_REPORT_IF_ERROR(expr) \
[](CUresult result) { \
if (!result) \
return; \
const char *name = nullptr; \
cuGetErrorName(result, &name); \
if (!name) \
name = "<unknown>"; \
Terminator terminator{__FILE__, __LINE__}; \
terminator.Crash("'%s' failed with '%s'", #expr, name); \
}(expr)

namespace Fortran::runtime::cuf {

void CUFRegisterAllocator();

void *CUFAllocPinned(std::size_t);
void CUFFreePinned(void *);

void *CUFAllocDevice(std::size_t);
void CUFFreeDevice(void *);

void *CUFAllocManaged(std::size_t);
void CUFFreeManaged(void *);

} // namespace Fortran::runtime::cuf
#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
3 changes: 3 additions & 0 deletions flang/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
add_dependencies(FortranRuntime flang-new module_files)
endif()

if (FLANG_CUF_RUNTIME)
add_subdirectory(CUDA)
endif()
19 changes: 19 additions & 0 deletions flang/runtime/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#

include_directories(${CUDAToolkit_INCLUDE_DIRS})
find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)

add_flang_library(CufRuntime
allocator.cpp
)
target_link_libraries(CufRuntime
PRIVATE
FortranRuntime
${CUDA_RUNTIME_LIBRARY}
)
60 changes: 60 additions & 0 deletions flang/runtime/CUDA/allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//===-- runtime/CUDA/allocator.cpp ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Runtime/CUDA/allocator.h"
#include "../allocator-registry.h"
#include "../derived.h"
#include "../stat.h"
#include "../terminator.h"
#include "../type-info.h"
#include "flang/Common/Fortran.h"
#include "flang/ISO_Fortran_binding_wrapper.h"

#include "cuda.h"

namespace Fortran::runtime::cuf {

void CUFRegisterAllocator() {
allocatorRegistry.Register(
kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
allocatorRegistry.Register(
kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice});
allocatorRegistry.Register(
kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged});
}

void *CUFAllocPinned(std::size_t sizeInBytes) {
void *p;
CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
return p;
}

void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }

void *CUFAllocDevice(std::size_t sizeInBytes) {
CUdeviceptr p = 0;
CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
return reinterpret_cast<void *>(p);
}

void CUFFreeDevice(void *p) {
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
}

void *CUFAllocManaged(std::size_t sizeInBytes) {
CUdeviceptr p = 0;
CUDA_REPORT_IF_ERROR(
cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
return reinterpret_cast<void *>(p);
}

void CUFFreeManaged(void *p) {
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
}

} // namespace Fortran::runtime::cuf
2 changes: 2 additions & 0 deletions flang/unittests/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests
PRIVATE
FortranRuntime
)

add_subdirectory(CUDA)
88 changes: 88 additions & 0 deletions flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "gtest/gtest.h"
#include "../../../runtime/terminator.h"
#include "flang/Common/Fortran.h"
#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/allocatable.h"

#include "cuda.h"

using namespace Fortran::runtime;

static OwningPtr<Descriptor> createAllocatable(
Fortran::common::TypeCategory tc, int kind, int rank = 1) {
return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
CFI_attribute_allocatable);
}

thread_local static int32_t defaultDevice = 0;

CUdevice getDefaultCuDevice() {
CUdevice device;
CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice));
return device;
}

class ScopedContext {
public:
ScopedContext() {
// Static reference to CUDA primary context for device ordinal
// defaultDevice.
static CUcontext context = [] {
CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0));
CUcontext ctx;
// Note: this does not affect the current context.
CUDA_REPORT_IF_ERROR(
cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice()));
return ctx;
}();

CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context));
}

~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
};

TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
using Fortran::common::TypeCategory;
Fortran::runtime::cuf::CUFRegisterAllocator();
ScopedContext ctx;
// REAL(4), DEVICE, ALLOCATABLE :: a(:)
auto a{createAllocatable(TypeCategory::Real, 4)};
a->SetAllocIdx(kDeviceAllocatorPos);
EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
EXPECT_FALSE(a->HasAddendum());
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(AllocatableDeallocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
EXPECT_FALSE(a->IsAllocated());
}

TEST(AllocatableCUFTest, SimplePinnedAllocate) {
using Fortran::common::TypeCategory;
Fortran::runtime::cuf::CUFRegisterAllocator();
ScopedContext ctx;
// INTEGER(4), PINNED, ALLOCATABLE :: a(:)
auto a{createAllocatable(TypeCategory::Integer, 4)};
EXPECT_FALSE(a->HasAddendum());
a->SetAllocIdx(kPinnedAllocatorPos);
EXPECT_EQ((int)kPinnedAllocatorPos, a->GetAllocIdx());
EXPECT_FALSE(a->HasAddendum());
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(AllocatableDeallocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
EXPECT_FALSE(a->IsAllocated());
}
15 changes: 15 additions & 0 deletions flang/unittests/Runtime/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
if (FLANG_CUF_RUNTIME)

add_flang_unittest(FlangCufRuntimeTests
AllocatorCUF.cpp
)

target_link_libraries(FlangCufRuntimeTests
PRIVATE
CufRuntime
FortranRuntime
)

target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS})

endif()

0 comments on commit 1417633

Please sign in to comment.