Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cuda support and acceleration for octomaps. #257

Open
wants to merge 32 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
00e8cfa
Updated Cmake for cuda support.
saifullah3396 Oct 15, 2019
8b7ee76
Macro definition for cuda error checking.
saifullah3396 Oct 15, 2019
53558f6
Made math classes CUDA host/device callable.
saifullah3396 Oct 15, 2019
64bdc9f
Made coordToKey and keyToCoord functions CUDA host/device callable.
saifullah3396 Oct 15, 2019
f776fc6
Made functions used in computeKeyRay device/host callable.
saifullah3396 Oct 15, 2019
16b66ed
Made OcTreeKey device/host callable.
saifullah3396 Oct 15, 2019
08e54a8
Added a key container class for storing ray-traced keys in c-array st…
saifullah3396 Oct 15, 2019
87810c3
declaration of CUDA based computeUpdate and and its kernel.
saifullah3396 Oct 15, 2019
d0f277a
Rename.
saifullah3396 Oct 15, 2019
2ca9335
Definition of atomic update of array from parallel threads.
saifullah3396 Oct 15, 2019
b8e410a
Definition of CUDA based computeUpdate and its kernel.
saifullah3396 Oct 15, 2019
506179e
Added computeUpdateCUDA usage.
saifullah3396 Oct 15, 2019
a371590
Added compilation of cuda files.
saifullah3396 Oct 15, 2019
5ca2a11
Removed -fPIC flag as it does not work with nvcc. Instead POSITION_IN…
saifullah3396 Oct 15, 2019
9f4d69c
Cuda file renames and fixes.
saifullah3396 Oct 15, 2019
ebd019f
Fixed _cuda_support_ block.
saifullah3396 Oct 15, 2019
7b40c05
Refactoring.
saifullah3396 Oct 15, 2019
eb1d49e
Refactoring.
saifullah3396 Oct 15, 2019
c9348f2
Added all tests.
saifullah3396 Oct 15, 2019
301f01b
Removed debugging info.
saifullah3396 Oct 15, 2019
c4b8998
Added debugging info.
saifullah3396 Oct 15, 2019
76c78c2
Fixed a few issues with the cuda side. The high time on tree initiali…
saifullah3396 Oct 15, 2019
3cb8e60
Better Cuda but still not better than CPU.
saifullah3396 Oct 22, 2019
b76c7ea
Better and faster than before.
saifullah3396 Oct 24, 2019
52f4348
Merged computeCuda and clean() kernels. Removed host copying since fi…
saifullah3396 Oct 25, 2019
15d62a2
Added lazy_eval as argument.
saifullah3396 Oct 25, 2019
5f220d2
Added support for discretized scan.
saifullah3396 Oct 25, 2019
43d4ad6
Removed KeyValue struct as it is unused now.
saifullah3396 Oct 25, 2019
7faa43f
Hash set is unused so removed.
saifullah3396 Oct 25, 2019
0941020
CMake refactor.
saifullah3396 Oct 25, 2019
14d242d
Refactoring and memory error fixes.
saifullah3396 Oct 25, 2019
a0a8c68
Fixed a mistake of free/occupied hash updates.
saifullah3396 Jan 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion octomap/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
PROJECT( octomap )
PROJECT( octomap LANGUAGES CUDA CXX)

include(CTest)

Expand All @@ -21,6 +21,7 @@ SET (CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")

# COMPILER SETTINGS (default: Release) and flags
INCLUDE(CompilerSettings)
SET (OCTOMAP_POSITION_INDEPENDENT_CODE ON)

# OCTOMAP_OMP = enable OpenMP parallelization (experimental, defaults to OFF)
SET(OCTOMAP_OMP FALSE CACHE BOOL "Enable/disable OpenMP parallelization")
Expand All @@ -34,6 +35,22 @@ IF(OCTOMAP_OMP)
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
ENDIF(OCTOMAP_OMP)

# __CUDA_SUPPORT__ = enable CUDA parallelization (experimental, defaults to OFF)
SET(__CUDA_SUPPORT__ FALSE CACHE BOOL "Enable/disable CUDA parallelization")
IF(DEFINED ENV{__CUDA_SUPPORT__})
SET(__CUDA_SUPPORT__ $ENV{__CUDA_SUPPORT__})
ENDIF(DEFINED ENV{__CUDA_SUPPORT__})

IF(__CUDA_SUPPORT__)
SET(CUDA_COMPUTE_CAPABILITY 61) # Tested only on 61 atm
FIND_PACKAGE( CUDA 9.0 REQUIRED)
message(STATUS "Found CUDA ${CUDA_VERSION_STRING} at ${CUDA_TOOLKIT_ROOT_DIR}")
set(CUDA_NVCC_FLAGS
${CUDA_NVCC_FLAGS};
-O3 -gencode arch=compute_${CUDA_COMPUTE_CAPABILITY},code=sm_${CUDA_COMPUTE_CAPABILITY})
add_definitions(-D__CUDA_SUPPORT__)
ENDIF(__CUDA_SUPPORT__)

# Set output directories for libraries and executables
SET( BASE_DIR ${CMAKE_SOURCE_DIR} )
SET( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${BASE_DIR}/lib )
Expand Down
4 changes: 2 additions & 2 deletions octomap/CMakeModules/CompilerSettings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ IF (CMAKE_COMPILER_IS_GNUCC)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-error ")
SET (CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops -DNDEBUG")
SET (CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
# Shared object compilation under 64bit (vtable)
ADD_DEFINITIONS(-fPIC)
## # Shared object compilation under 64bit (vtable)
set(CMAKE_POSITION_INDEPENDENT_CODE ON) # enables -fPIC in applicable compilers (required to avoid link errors in some cases)
ENDIF()


Expand Down
17 changes: 17 additions & 0 deletions octomap/include/octomap/CudaAssertion.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream>

#ifndef CUDA_ASSERTION_CUH
#define CUDA_ASSERTION_CUH

#define cudaCheckErrors(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#endif
70 changes: 70 additions & 0 deletions octomap/include/octomap/CudaOctomapUpdater.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#ifndef CUDA_OCTOMAP_UPDATER_CUH
#define CUDA_OCTOMAP_UPDATER_CUH
#include <cuda.h>
#include <cuda_runtime.h>
#include <nppi.h>
#include <octomap/OccupancyOcTreeBase.h>
#include <octomap/ColorOcTree.h>
#include <octomap/OcTreeStamped.h>
#include <octomap/TArray.cuh>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <algorithm>

using namespace octomap;

#ifdef __CUDA_SUPPORT__

template <class NODE>
class CudaOctomapUpdater
{
public:
CudaOctomapUpdater(
octomap::OccupancyOcTreeBase<NODE>* tree_base,
const double& max_range,
const size_t& scan_size,
const bool& print_info = true);

~CudaOctomapUpdater();

void initialize();

void computeUpdate(
const octomap::Pointcloud& scan,
const octomap::point3d& origin,
const double& max_range,
const bool& lazy_eval);

private:
// tree base on host
octomap::OccupancyOcTreeBase<NODE>* tree_base_;
// copy of tree base on device
octomap::OccupancyOcTreeBase<NODE>* tree_base_device_;
// preallocation of ray containers for ray-casting
KeyRayCuda* rays_device_;
int n_rays_;
// Hashset for occupied/free cells on device
UnsignedArrayCuda* free_hash_arr_device_;
UnsignedArrayCuda* occupied_hash_arr_device_;
TArray<KeyHash>* free_hashes_device_;
TArray<KeyHash>* occupied_hashes_device_;
bool use_bbx_limit_;
double res_;
double res_half_;

// make an array of points from the point cloud for device usage
octomap::point3d* scan_device;
size_t scan_size_;

// Kernel settings
int n_total_threads_;
int n_blocks_;
int n_threads_per_block_;

// map size settings
int ray_size_;
int max_hash_elements_ = 5e6;
int max_range_ = -1;
};
#endif
#endif
41 changes: 25 additions & 16 deletions octomap/include/octomap/OcTreeBaseImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@
#include "OcTreeKey.h"
#include "ScanGraph.h"

#ifdef __CUDACC__
#ifndef CUDA_CALLABLE
#define CUDA_CALLABLE __host__ __device__
#endif
#else
#ifndef CUDA_CALLABLE
#define CUDA_CALLABLE
#endif
#endif

namespace octomap {

Expand Down Expand Up @@ -354,34 +363,34 @@ namespace octomap {
//

/// Converts from a single coordinate into a discrete key
inline key_type coordToKey(double coordinate) const{
CUDA_CALLABLE inline key_type coordToKey(double coordinate) const{
return ((int) floor(resolution_factor * coordinate)) + tree_max_val;
}

/// Converts from a single coordinate into a discrete key at a given depth
key_type coordToKey(double coordinate, unsigned depth) const;
CUDA_CALLABLE key_type coordToKey(double coordinate, unsigned depth) const;


/// Converts from a 3D coordinate into a 3D addressing key
inline OcTreeKey coordToKey(const point3d& coord) const{
CUDA_CALLABLE inline OcTreeKey coordToKey(const point3d& coord) const{
return OcTreeKey(coordToKey(coord(0)), coordToKey(coord(1)), coordToKey(coord(2)));
}

/// Converts from a 3D coordinate into a 3D addressing key
inline OcTreeKey coordToKey(double x, double y, double z) const{
CUDA_CALLABLE inline OcTreeKey coordToKey(double x, double y, double z) const{
return OcTreeKey(coordToKey(x), coordToKey(y), coordToKey(z));
}

/// Converts from a 3D coordinate into a 3D addressing key at a given depth
inline OcTreeKey coordToKey(const point3d& coord, unsigned depth) const{
CUDA_CALLABLE inline OcTreeKey coordToKey(const point3d& coord, unsigned depth) const{
if (depth == tree_depth)
return coordToKey(coord);
else
return OcTreeKey(coordToKey(coord(0), depth), coordToKey(coord(1), depth), coordToKey(coord(2), depth));
}

/// Converts from a 3D coordinate into a 3D addressing key at a given depth
inline OcTreeKey coordToKey(double x, double y, double z, unsigned depth) const{
CUDA_CALLABLE inline OcTreeKey coordToKey(double x, double y, double z, unsigned depth) const{
if (depth == tree_depth)
return coordToKey(x,y,z);
else
Expand Down Expand Up @@ -421,7 +430,7 @@ namespace octomap {
* @param key values that will be computed, an array of fixed size 3.
* @return true if point is within the octree (valid), false otherwise
*/
bool coordToKeyChecked(const point3d& coord, OcTreeKey& key) const;
CUDA_CALLABLE bool coordToKeyChecked(const point3d& coord, OcTreeKey& key) const;

/**
* Converts a 3D coordinate into a 3D OcTreeKey at a certain depth, with boundary checking.
Expand All @@ -431,7 +440,7 @@ namespace octomap {
* @param key values that will be computed, an array of fixed size 3.
* @return true if point is within the octree (valid), false otherwise
*/
bool coordToKeyChecked(const point3d& coord, unsigned depth, OcTreeKey& key) const;
CUDA_CALLABLE bool coordToKeyChecked(const point3d& coord, unsigned depth, OcTreeKey& key) const;

/**
* Converts a 3D coordinate into a 3D OcTreeKey, with boundary checking.
Expand All @@ -442,7 +451,7 @@ namespace octomap {
* @param key values that will be computed, an array of fixed size 3.
* @return true if point is within the octree (valid), false otherwise
*/
bool coordToKeyChecked(double x, double y, double z, OcTreeKey& key) const;
CUDA_CALLABLE bool coordToKeyChecked(double x, double y, double z, OcTreeKey& key) const;

/**
* Converts a 3D coordinate into a 3D OcTreeKey at a certain depth, with boundary checking.
Expand All @@ -454,7 +463,7 @@ namespace octomap {
* @param key values that will be computed, an array of fixed size 3.
* @return true if point is within the octree (valid), false otherwise
*/
bool coordToKeyChecked(double x, double y, double z, unsigned depth, OcTreeKey& key) const;
CUDA_CALLABLE bool coordToKeyChecked(double x, double y, double z, unsigned depth, OcTreeKey& key) const;

/**
* Converts a single coordinate into a discrete addressing key, with boundary checking.
Expand All @@ -463,7 +472,7 @@ namespace octomap {
* @param key discrete 16 bit adressing key, result
* @return true if coordinate is within the octree bounds (valid), false otherwise
*/
bool coordToKeyChecked(double coordinate, key_type& key) const;
CUDA_CALLABLE bool coordToKeyChecked(double coordinate, key_type& key) const;

/**
* Converts a single coordinate into a discrete addressing key, with boundary checking.
Expand All @@ -473,27 +482,27 @@ namespace octomap {
* @param key discrete 16 bit adressing key, result
* @return true if coordinate is within the octree bounds (valid), false otherwise
*/
bool coordToKeyChecked(double coordinate, unsigned depth, key_type& key) const;
CUDA_CALLABLE bool coordToKeyChecked(double coordinate, unsigned depth, key_type& key) const;

/// converts from a discrete key at a given depth into a coordinate
/// corresponding to the key's center
double keyToCoord(key_type key, unsigned depth) const;
CUDA_CALLABLE double keyToCoord(key_type key, unsigned depth) const;

/// converts from a discrete key at the lowest tree level into a coordinate
/// corresponding to the key's center
inline double keyToCoord(key_type key) const{
CUDA_CALLABLE inline double keyToCoord(key_type key) const{
return (double( (int) key - (int) this->tree_max_val ) +0.5) * this->resolution;
}

/// converts from an addressing key at the lowest tree level into a coordinate
/// corresponding to the key's center
inline point3d keyToCoord(const OcTreeKey& key) const{
CUDA_CALLABLE inline point3d keyToCoord(const OcTreeKey& key) const{
return point3d(float(keyToCoord(key[0])), float(keyToCoord(key[1])), float(keyToCoord(key[2])));
}

/// converts from an addressing key at a given depth into a coordinate
/// corresponding to the key's center
inline point3d keyToCoord(const OcTreeKey& key, unsigned depth) const{
CUDA_CALLABLE inline point3d keyToCoord(const OcTreeKey& key, unsigned depth) const{
return point3d(float(keyToCoord(key[0], depth)), float(keyToCoord(key[1], depth)), float(keyToCoord(key[2], depth)));
}

Expand Down
36 changes: 26 additions & 10 deletions octomap/include/octomap/OcTreeKey.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <ciso646>

#include <assert.h>
#include <vector>

/* Libc++ does not implement the TR1 namespace, all c++11 related functionality
* is instead implemented in the std namespace.
Expand All @@ -58,6 +59,21 @@
}
#endif

#ifdef __CUDA_SUPPORT__
#include <cuda.h>
#include <cuda_runtime.h>
#endif

#ifdef __CUDACC__
#ifndef CUDA_CALLABLE
#define CUDA_CALLABLE __host__ __device__
#endif
#else
#ifndef CUDA_CALLABLE
#define CUDA_CALLABLE
#endif
#endif

namespace octomap {

typedef uint16_t key_type;
Expand All @@ -70,45 +86,45 @@ namespace octomap {
class OcTreeKey {

public:
OcTreeKey () {}
OcTreeKey (key_type a, key_type b, key_type c){
CUDA_CALLABLE OcTreeKey () {}
CUDA_CALLABLE OcTreeKey (key_type a, key_type b, key_type c){
k[0] = a;
k[1] = b;
k[2] = c;
}

OcTreeKey(const OcTreeKey& other){
CUDA_CALLABLE OcTreeKey(const OcTreeKey& other){
k[0] = other.k[0];
k[1] = other.k[1];
k[2] = other.k[2];
}

bool operator== (const OcTreeKey &other) const {
CUDA_CALLABLE bool operator== (const OcTreeKey &other) const {
return ((k[0] == other[0]) && (k[1] == other[1]) && (k[2] == other[2]));
}

bool operator!= (const OcTreeKey& other) const {
CUDA_CALLABLE bool operator!= (const OcTreeKey& other) const {
return( (k[0] != other[0]) || (k[1] != other[1]) || (k[2] != other[2]) );
}

OcTreeKey& operator=(const OcTreeKey& other){
CUDA_CALLABLE OcTreeKey& operator=(const OcTreeKey& other){
k[0] = other.k[0]; k[1] = other.k[1]; k[2] = other.k[2];
return *this;
}

const key_type& operator[] (unsigned int i) const {
CUDA_CALLABLE const key_type& operator[] (unsigned int i) const {
return k[i];
}

key_type& operator[] (unsigned int i) {
CUDA_CALLABLE key_type& operator[] (unsigned int i) {
return k[i];
}

key_type k[3];

/// Provides a hash function on Keys
struct KeyHash{
size_t operator()(const OcTreeKey& key) const{
CUDA_CALLABLE size_t operator()(const OcTreeKey& key) const{
// a simple hashing function
// explicit casts to size_t to operate on the complete range
// constanst will be promoted according to C++ standard
Expand Down Expand Up @@ -180,7 +196,7 @@ namespace octomap {
std::vector<OcTreeKey>::iterator end_of_ray;
const static size_t maxSize = 100000;
};

/**
* Computes the key of a child node while traversing the octree, given
* child index and current key
Expand Down
Loading