forked from llvm/clangir
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[flang][cuda] Add CUF allocator (#101216)
Add allocators for CUDA fortran allocation on the device. 3 allocators are added for pinned, device and managed/unified memory allocation. `CUFRegisterAllocator()` is called to register the allocators in the allocator registry added in #100690. Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to conditionally build these.
- Loading branch information
1 parent
d6649f2
commit 1417633
Showing
8 changed files
with
238 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ | ||
#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ | ||
|
||
#include "flang/Runtime/descriptor.h" | ||
|
||
static constexpr unsigned kPinnedAllocatorPos = 1; | ||
static constexpr unsigned kDeviceAllocatorPos = 2; | ||
static constexpr unsigned kManagedAllocatorPos = 3; | ||
|
||
#define CUDA_REPORT_IF_ERROR(expr) \ | ||
[](CUresult result) { \ | ||
if (!result) \ | ||
return; \ | ||
const char *name = nullptr; \ | ||
cuGetErrorName(result, &name); \ | ||
if (!name) \ | ||
name = "<unknown>"; \ | ||
Terminator terminator{__FILE__, __LINE__}; \ | ||
terminator.Crash("'%s' failed with '%s'", #expr, name); \ | ||
}(expr) | ||
|
||
namespace Fortran::runtime::cuf { | ||
|
||
void CUFRegisterAllocator(); | ||
|
||
void *CUFAllocPinned(std::size_t); | ||
void CUFFreePinned(void *); | ||
|
||
void *CUFAllocDevice(std::size_t); | ||
void CUFFreeDevice(void *); | ||
|
||
void *CUFAllocManaged(std::size_t); | ||
void CUFFreeManaged(void *); | ||
|
||
} // namespace Fortran::runtime::cuf | ||
#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===# | ||
# | ||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
#===------------------------------------------------------------------------===# | ||
|
||
include_directories(${CUDAToolkit_INCLUDE_DIRS}) | ||
find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED) | ||
|
||
add_flang_library(CufRuntime | ||
allocator.cpp | ||
) | ||
target_link_libraries(CufRuntime | ||
PRIVATE | ||
FortranRuntime | ||
${CUDA_RUNTIME_LIBRARY} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
//===-- runtime/CUDA/allocator.cpp ----------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "flang/Runtime/CUDA/allocator.h" | ||
#include "../allocator-registry.h" | ||
#include "../derived.h" | ||
#include "../stat.h" | ||
#include "../terminator.h" | ||
#include "../type-info.h" | ||
#include "flang/Common/Fortran.h" | ||
#include "flang/ISO_Fortran_binding_wrapper.h" | ||
|
||
#include "cuda.h" | ||
|
||
namespace Fortran::runtime::cuf { | ||
|
||
void CUFRegisterAllocator() { | ||
allocatorRegistry.Register( | ||
kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned}); | ||
allocatorRegistry.Register( | ||
kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice}); | ||
allocatorRegistry.Register( | ||
kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged}); | ||
} | ||
|
||
void *CUFAllocPinned(std::size_t sizeInBytes) { | ||
void *p; | ||
CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes)); | ||
return p; | ||
} | ||
|
||
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); } | ||
|
||
void *CUFAllocDevice(std::size_t sizeInBytes) { | ||
CUdeviceptr p = 0; | ||
CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes)); | ||
return reinterpret_cast<void *>(p); | ||
} | ||
|
||
void CUFFreeDevice(void *p) { | ||
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); | ||
} | ||
|
||
void *CUFAllocManaged(std::size_t sizeInBytes) { | ||
CUdeviceptr p = 0; | ||
CUDA_REPORT_IF_ERROR( | ||
cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL)); | ||
return reinterpret_cast<void *>(p); | ||
} | ||
|
||
void CUFFreeManaged(void *p) { | ||
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); | ||
} | ||
|
||
} // namespace Fortran::runtime::cuf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests | |
PRIVATE | ||
FortranRuntime | ||
) | ||
|
||
add_subdirectory(CUDA) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "gtest/gtest.h" | ||
#include "../../../runtime/terminator.h" | ||
#include "flang/Common/Fortran.h" | ||
#include "flang/Runtime/CUDA/allocator.h" | ||
#include "flang/Runtime/allocatable.h" | ||
|
||
#include "cuda.h" | ||
|
||
using namespace Fortran::runtime; | ||
|
||
static OwningPtr<Descriptor> createAllocatable( | ||
Fortran::common::TypeCategory tc, int kind, int rank = 1) { | ||
return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr, | ||
CFI_attribute_allocatable); | ||
} | ||
|
||
thread_local static int32_t defaultDevice = 0; | ||
|
||
CUdevice getDefaultCuDevice() { | ||
CUdevice device; | ||
CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice)); | ||
return device; | ||
} | ||
|
||
class ScopedContext { | ||
public: | ||
ScopedContext() { | ||
// Static reference to CUDA primary context for device ordinal | ||
// defaultDevice. | ||
static CUcontext context = [] { | ||
CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0)); | ||
CUcontext ctx; | ||
// Note: this does not affect the current context. | ||
CUDA_REPORT_IF_ERROR( | ||
cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice())); | ||
return ctx; | ||
}(); | ||
|
||
CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context)); | ||
} | ||
|
||
~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); } | ||
}; | ||
|
||
TEST(AllocatableCUFTest, SimpleDeviceAllocate) { | ||
using Fortran::common::TypeCategory; | ||
Fortran::runtime::cuf::CUFRegisterAllocator(); | ||
ScopedContext ctx; | ||
// REAL(4), DEVICE, ALLOCATABLE :: a(:) | ||
auto a{createAllocatable(TypeCategory::Real, 4)}; | ||
a->SetAllocIdx(kDeviceAllocatorPos); | ||
EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx()); | ||
EXPECT_FALSE(a->HasAddendum()); | ||
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); | ||
RTNAME(AllocatableAllocate) | ||
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); | ||
EXPECT_TRUE(a->IsAllocated()); | ||
RTNAME(AllocatableDeallocate) | ||
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); | ||
EXPECT_FALSE(a->IsAllocated()); | ||
} | ||
|
||
TEST(AllocatableCUFTest, SimplePinnedAllocate) { | ||
using Fortran::common::TypeCategory; | ||
Fortran::runtime::cuf::CUFRegisterAllocator(); | ||
ScopedContext ctx; | ||
// INTEGER(4), PINNED, ALLOCATABLE :: a(:) | ||
auto a{createAllocatable(TypeCategory::Integer, 4)}; | ||
EXPECT_FALSE(a->HasAddendum()); | ||
a->SetAllocIdx(kPinnedAllocatorPos); | ||
EXPECT_EQ((int)kPinnedAllocatorPos, a->GetAllocIdx()); | ||
EXPECT_FALSE(a->HasAddendum()); | ||
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); | ||
RTNAME(AllocatableAllocate) | ||
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); | ||
EXPECT_TRUE(a->IsAllocated()); | ||
RTNAME(AllocatableDeallocate) | ||
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); | ||
EXPECT_FALSE(a->IsAllocated()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
if (FLANG_CUF_RUNTIME) | ||
|
||
add_flang_unittest(FlangCufRuntimeTests | ||
AllocatorCUF.cpp | ||
) | ||
|
||
target_link_libraries(FlangCufRuntimeTests | ||
PRIVATE | ||
CufRuntime | ||
FortranRuntime | ||
) | ||
|
||
target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) | ||
|
||
endif() |