From 84f22f7e72c0fe8e5554d7ddfebf22c93ffb4610 Mon Sep 17 00:00:00 2001
From: Christopher Dunn <cdunn@pacificbiosciences.com>
Date: Tue, 23 Feb 2021 13:40:07 -0600
Subject: [PATCH] GW_USE_DEVICE_ALLOCATOR_FILE

based on gw_enable_caching_allocator

This way, after the library is built and installed, the user
does not need to specify `-DGW_ENABLE_CACHING_ALLOCATOR`,
which would select the DefaultDeviceAllocator to use.
---
 common/base/CMakeLists.txt                    |  6 +-
 .../genomeworks/utils/allocator.in            | 38 ++++++++++++
 .../utils/{allocator.hpp => allocators.hpp}   | 38 ------------
 .../utils/use_caching_device_allocator.hpp    | 60 +++++++++++++++++++
 .../utils/use_cuda_malloc_allocator.hpp       | 58 ++++++++++++++++++
 5 files changed, 161 insertions(+), 39 deletions(-)
 create mode 100644 common/base/include/claraparabricks/genomeworks/utils/allocator.in
 rename common/base/include/claraparabricks/genomeworks/utils/{allocator.hpp => allocators.hpp} (88%)
 create mode 100644 common/base/include/claraparabricks/genomeworks/utils/use_caching_device_allocator.hpp
 create mode 100644 common/base/include/claraparabricks/genomeworks/utils/use_cuda_malloc_allocator.hpp

diff --git a/common/base/CMakeLists.txt b/common/base/CMakeLists.txt
index 677a0d144..7086f2054 100644
--- a/common/base/CMakeLists.txt
+++ b/common/base/CMakeLists.txt
@@ -21,6 +21,8 @@ set(MODULE_NAME gwbase)
 set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++14")
 message(STATUS "nvcc flags for ${MODULE_NAME}: ${CUDA_NVCC_FLAGS}")
 
+configure_file(include/claraparabricks/genomeworks/utils/allocator.in include/claraparabricks/genomeworks/utils/allocator.hpp)
+
 get_property(gw_library_type GLOBAL PROPERTY gw_library_type)
 add_library(${MODULE_NAME} ${gw_library_type}
         src/cudautils.cpp
@@ -40,7 +42,9 @@ if (gw_device_synchronize_kernels)
 endif()
 
 if(gw_enable_caching_allocator)
-    target_compile_definitions(${MODULE_NAME} PUBLIC GW_ENABLE_CACHING_ALLOCATOR)
+    set(GW_USE_DEVICE_ALLOCATOR_FILE "use_caching_device_allocator.hpp")
+else()
+    set(GW_USE_DEVICE_ALLOCATOR_FILE "use_cuda_malloc_allocator.hpp")
 endif()
 
 target_include_directories(${MODULE_NAME}
diff --git a/common/base/include/claraparabricks/genomeworks/utils/allocator.in b/common/base/include/claraparabricks/genomeworks/utils/allocator.in
new file mode 100644
index 000000000..d42822d04
--- /dev/null
+++ b/common/base/include/claraparabricks/genomeworks/utils/allocator.in
@@ -0,0 +1,38 @@
+/*
+* Copyright 2019-2020 NVIDIA CORPORATION.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#pragma once
+
+#include @GW_USE_DEVICE_ALLOCATOR_FILE@
+
+namespace claraparabricks
+{
+
+namespace genomeworks
+{
+
+/// Gets the size of the largest free memory block in the allocator
+///
+/// @see create_default_device_allocator
+/// \return returns the size in bytes
+inline int64_t get_size_of_largest_free_memory_block(DefaultDeviceAllocator const& allocator)
+{
+    return allocator.get_size_of_largest_free_memory_block();
+}
+
+} // namespace genomeworks
+
+} // namespace claraparabricks
diff --git a/common/base/include/claraparabricks/genomeworks/utils/allocator.hpp b/common/base/include/claraparabricks/genomeworks/utils/allocators.hpp
similarity index 88%
rename from common/base/include/claraparabricks/genomeworks/utils/allocator.hpp
rename to common/base/include/claraparabricks/genomeworks/utils/allocators.hpp
index 62dde51a9..fccd13396 100644
--- a/common/base/include/claraparabricks/genomeworks/utils/allocator.hpp
+++ b/common/base/include/claraparabricks/genomeworks/utils/allocators.hpp
@@ -319,44 +319,6 @@ class CachingDeviceAllocator
     cudaStream_t default_stream_;
 };
 
-#ifdef GW_ENABLE_CACHING_ALLOCATOR
-using DefaultDeviceAllocator = CachingDeviceAllocator<char, details::DevicePreallocatedAllocator>;
-#else
-using DefaultDeviceAllocator = CudaMallocAllocator<char>;
-#endif
-
-/// Gets the size of the largest free memory block in the allocator
-///
-/// \return returns the size in bytes
-inline int64_t get_size_of_largest_free_memory_block(DefaultDeviceAllocator const& allocator)
-{
-    return allocator.get_size_of_largest_free_memory_block();
-}
-
-/// Constructs a DefaultDeviceAllocator
-///
-/// This function provides a way to construct a valid DefaultDeviceAllocator
-/// for all possible DefaultDeviceAllocators.
-/// Use this function to obtain a DefaultDeviceAllocator object.
-/// This function is needed, since construction of CachingDeviceAllocator
-/// requires a max_caching_size argument to obtain a valid allocator.
-/// Default constuction of CachingDeviceAllocator yields an dummy object
-/// which cannot allocate memory.
-/// \param max_cached_bytes max bytes used by memory resource used by CachingDeviceAllocator (default: 2GiB, unused for CudaMallocAllocator)
-/// \param default_stream if a call to allocate() does not specify any streams this stream will be used instead (unused for CudaMallocAllocator)
-inline DefaultDeviceAllocator create_default_device_allocator(std::size_t max_caching_size = 2ull * 1024 * 1024 * 1024,
-                                                              cudaStream_t default_stream  = 0)
-{
-#ifdef GW_ENABLE_CACHING_ALLOCATOR
-    return DefaultDeviceAllocator(max_caching_size,
-                                  default_stream);
-#else
-    static_cast<void>(max_caching_size);
-    static_cast<void>(default_stream);
-    return DefaultDeviceAllocator();
-#endif
-}
-
 } // namespace genomeworks
 
 } // namespace claraparabricks
diff --git a/common/base/include/claraparabricks/genomeworks/utils/use_caching_device_allocator.hpp b/common/base/include/claraparabricks/genomeworks/utils/use_caching_device_allocator.hpp
new file mode 100644
index 000000000..d873a0d8e
--- /dev/null
+++ b/common/base/include/claraparabricks/genomeworks/utils/use_caching_device_allocator.hpp
@@ -0,0 +1,60 @@
+/*
+* Copyright 2019-2020 NVIDIA CORPORATION.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef GW_INCLUDED_DEVICE_ALLOCATOR_HPP
+#define GW_INCLUDED_DEVICE_ALLOCATOR_HPP
+
+#ifdef GW_ENABLE_CACHING_ALLOCATOR
+#warning "GW_ENABLE_CACHING_ALLOCATOR should not be already set."
+#else
+#define GW_ENABLE_CACHING_ALLOCATOR
+#endif
+
+#include "allocators.hpp"
+
+namespace claraparabricks
+{
+
+namespace genomeworks
+{
+
+using DefaultDeviceAllocator = CachingDeviceAllocator<char, details::DevicePreallocatedAllocator>;
+
+/// Constructs a DefaultDeviceAllocator
+///
+/// This function provides a way to construct a valid DefaultDeviceAllocator
+/// for all possible DefaultDeviceAllocators.
+/// Use this function to obtain a DefaultDeviceAllocator object.
+/// This function is needed, since construction of CachingDeviceAllocator
+/// requires a max_caching_size argument to obtain a valid allocator.
+/// Default constuction of CachingDeviceAllocator yields an dummy object
+/// which cannot allocate memory.
+/// \param max_cached_bytes max bytes used by memory resource used by CachingDeviceAllocator (default: 2GiB)
+/// \param default_stream if a call to allocate() does not specify any streams this stream will be used instead
+inline DefaultDeviceAllocator create_default_device_allocator(std::size_t max_caching_size = 2ull * 1024 * 1024 * 1024,
+                                                              cudaStream_t default_stream  = 0)
+{
+    return DefaultDeviceAllocator(max_caching_size,
+                                  default_stream);
+}
+
+} // namespace genomeworks
+
+} // namespace claraparabricks
+
+#else
+#error "Attempted to included 2 DeviceAllocators!"
+#endif
diff --git a/common/base/include/claraparabricks/genomeworks/utils/use_cuda_malloc_allocator.hpp b/common/base/include/claraparabricks/genomeworks/utils/use_cuda_malloc_allocator.hpp
new file mode 100644
index 000000000..54afedb2a
--- /dev/null
+++ b/common/base/include/claraparabricks/genomeworks/utils/use_cuda_malloc_allocator.hpp
@@ -0,0 +1,58 @@
+/*
+* Copyright 2019-2020 NVIDIA CORPORATION.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef GW_INCLUDED_DEVICE_ALLOCATOR_HPP
+#define GW_INCLUDED_DEVICE_ALLOCATOR_HPP
+
+#ifdef GW_ENABLE_CACHING_ALLOCATOR
+#warning "GW_ENABLE_CACHING_ALLOCATOR should not be set for CudaMallocAllocator."
+#undef GW_ENABLE_CACHING_ALLOCATOR
+#endif
+
+namespace claraparabricks
+{
+
+namespace genomeworks
+{
+
+using DefaultDeviceAllocator = CudaMallocAllocator<char>;
+
+/// Constructs a DefaultDeviceAllocator
+///
+/// This function provides a way to construct a valid DefaultDeviceAllocator
+/// for all possible DefaultDeviceAllocators.
+/// Use this function to obtain a DefaultDeviceAllocator object.
+/// This function is needed, since construction of CachingDeviceAllocator
+/// requires a max_caching_size argument to obtain a valid allocator.
+/// Default constuction of CachingDeviceAllocator yields an dummy object
+/// which cannot allocate memory.
+/// \param max_cached_bytes max bytes used by memory resource used by CachingDeviceAllocator (unused)
+/// \param default_stream if a call to allocate() does not specify any streams this stream will be used instead (unused)
+inline DefaultDeviceAllocator create_default_device_allocator(std::size_t max_caching_size = 2ull * 1024 * 1024 * 1024,
+                                                              cudaStream_t default_stream  = 0)
+{
+    static_cast<void>(max_caching_size);
+    static_cast<void>(default_stream);
+    return DefaultDeviceAllocator();
+}
+
+} // namespace genomeworks
+
+} // namespace claraparabricks
+
+#else
+#error "Attempted to included 2 DeviceAllocators!"
+#endif