Skip to content

Commit

Permalink
Merge pull request rapidsai#641 from teju85/fea-ext-separate-c-librar…
Browse files Browse the repository at this point in the history
…y-build

[REVIEW] Separate c library build
  • Loading branch information
cjnolet authored Jun 4, 2019
2 parents 5ee2601 + 77aac6d commit 9d948d5
Show file tree
Hide file tree
Showing 12 changed files with 221 additions and 153 deletions.
7 changes: 4 additions & 3 deletions BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ $ cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DGPU_ARCHS="70"

There are many options to configure the build process, see the [customizing build section](#custom-build-options).

3. Build `libcuml++`:
3. Build `libcuml++` and `libcuml`:

```bash
$ make -j
Expand Down Expand Up @@ -116,7 +116,7 @@ $ ./build.sh # build the cuML libraries, tests, and py

To build individual components, specify them as arguments to `build.sh`
```bash
$ ./build.sh libcuml # build and install the cuML C++ library
$ ./build.sh libcuml # build and install the cuML C++ and C-wrapper libraries
$ ./build.sh cuml # build and install the cuML python package
$ ./build.sh prims # build the ML prims tests
```
Expand All @@ -139,7 +139,8 @@ cuML's cmake has the following configurable flags available:
| Flag | Possible Values | Default Value | Behavior |
| --- | --- | --- | --- |
| BLAS_LIBRARIES | path/to/blas_lib | "" | Optional variable allowing to manually specify location of BLAS library. |
| BUILD_CUML_CPP_LIBRARY | [ON, OFF] | ON | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
| BUILD_CUML_CPP_LIBRARY | [ON, OFF] | ON | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_C_LIBRARY, BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
| BUILD_CUML_C_LIBRARY | [ON, OFF] | ON | Enable/disable building libcuml shared library. Setting this variable to `ON` will set the variable BUILD_CUML_CPP_LIBRARY to `ON` |
| BUILD_CUML_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_test`. |
| BUILD_CUML_MG_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_mg_test`. |
| BUILD_PRIMS_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `prims_test`. |
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- PR #636: Rand Index metric ml-prim
- PR #515: Added Random Projection feature
- PR #504: Contingency matrix ml-prim
- PR #641: C: Separate C-wrapper library build to generate libcuml.so
- PR #631: Add nvcategory based ordinal label encoder

## Improvements
Expand Down
5 changes: 3 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ VALIDARGS="clean libcuml cuml prims -v -g -n --allgpuarch --multigpu -h --help"
HELP="$0 [<target> ...] [<flag> ...]
where <target> is:
clean - remove all existing build artifacts and configuration (start over)
libcuml - build the cuml C++ code only
libcuml - build the cuml C++ code only. Also builds the C-wrapper library
around the C++ code.
cuml - build the cuml Python package
prims - build the ML prims tests
and <flag> is:
Expand Down Expand Up @@ -130,7 +131,7 @@ fi
if (( ${NUMARGS} == 0 )) || hasArg libcuml; then

cd ${LIBCUML_BUILD_DIR}
make -j${PARALLEL_LEVEL} cuml++ ml ml_mg VERBOSE=${VERBOSE} ${INSTALL_TARGET}
make -j${PARALLEL_LEVEL} cuml++ cuml ml ml_mg VERBOSE=${VERBOSE} ${INSTALL_TARGET}
fi

# Build and (optionally) install the cuml Python package
Expand Down
25 changes: 22 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ endif()

option(BUILD_CUML_CPP_LIBRARY "Build libcuml++ shared library" ON)

option(BUILD_CUML_C_LIBRARY "Build libcuml shared library" ON)

option(BUILD_CUML_TESTS "Build cuML algorithm tests" ON)

option(BUILD_CUML_MG_TESTS "Build cuML multigpu algorithm tests" ON)
Expand All @@ -66,8 +68,14 @@ set(GPU_ARCHS "" CACHE STRING
set(CMAKE_IGNORE_PATH "${CMAKE_INSTALL_DIR}/lib" CACHE STRING
"Ignore any libs added implicitly from the CMAKE_INSTALL_DIR")

# Disabling libcuml++ disables buidling algorithm tests and examples
# Enabling libcuml enables building libcuml++
if(BUILD_CUML_C_LIBRARY)
set(BUILD_CUML_CPP_LIBRARY ON)
endif(BUILD_CUML_C_LIBRARY)

# Disabling libcuml++ disables building algorithm tests and examples
if(NOT BUILD_CUML_CPP_LIBRARY)
set(BUILD_CUML_C_LIBRARY OFF)
set(BUILD_CUML_TESTS OFF)
set(BUILD_CUML_MG_TESTS OFF)
set(BUILD_CUML_EXAMPLES OFF)
Expand Down Expand Up @@ -263,14 +271,13 @@ set(PRIMS_TEST_UTILS
src_prims/utils.h)

###################################################################################################
# - build libcuml++ shared library ------------------------------------------------------------------
# - build libcuml++ shared library ----------------------------------------------------------------

if(BUILD_CUML_CPP_LIBRARY)

set(CUML_CPP_TARGET "cuml++")
add_library(${CUML_CPP_TARGET} SHARED
src/common/cumlHandle.cpp
src/common/cuml_api.cpp
src/dbscan/dbscan.cu
src/decisiontree/decisiontree.cu
src/glm/glm.cu
Expand Down Expand Up @@ -305,6 +312,18 @@ if(BUILD_CUML_CPP_LIBRARY)

endif(BUILD_CUML_CPP_LIBRARY)

###################################################################################################
# - build libcuml shared library ------------------------------------------------------------------

if(BUILD_CUML_C_LIBRARY)
set(CUML_C_TARGET "cuml")
add_library(${CUML_C_TARGET} SHARED
src/common/cuml_api.cpp
src/dbscan/dbscan_api.cpp
src/glm/glm_api.cpp)
target_link_libraries(${CUML_C_TARGET} ${CUML_CPP_TARGET})
endif(BUILD_CUML_C_LIBRARY)

###################################################################################################
# - build test executables ------------------------------------------------------------------------

Expand Down
6 changes: 5 additions & 1 deletion cpp/src/cuML_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ extern "C" {

typedef int cumlHandle_t;

enum cumlError_t { CUML_SUCCESS, CUML_ERROR_UNKNOWN, CUML_INVALID_HANDLE };
typedef enum {
CUML_SUCCESS,
CUML_ERROR_UNKNOWN,
CUML_INVALID_HANDLE
} cumlError_t;

typedef cudaError_t (*cuml_allocate)(void** p,size_t n, cudaStream_t stream);
typedef cudaError_t (*cuml_deallocate)(void* p, size_t n, cudaStream_t stream);
Expand Down
50 changes: 0 additions & 50 deletions cpp/src/dbscan/dbscan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,53 +38,3 @@ void dbscanFit(const cumlHandle& handle, double *input, int n_rows, int n_cols,
}

}; // end namespace ML


extern "C" cumlError_t cumlSpDbscanFit(cumlHandle_t handle, float *input, int n_rows, int n_cols, float eps, int min_pts,
int *labels, size_t max_bytes_per_batch, bool verbose) {
cumlError_t status;
ML::cumlHandle *handle_ptr;
std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle);
if (status == CUML_SUCCESS) {
try
{
dbscanFit(*handle_ptr, input, n_rows, n_cols, eps, min_pts, labels, max_bytes_per_batch, verbose);
}
//TODO: Implement this
//catch (const MLCommon::Exception& e)
//{
// //log e.what()?
// status = e.getErrorCode();
//}
catch (...)
{
status = CUML_ERROR_UNKNOWN;
}
}
return status;

}

extern "C" cumlError_t cumlDpDbscanFit(cumlHandle_t handle, double *input, int n_rows, int n_cols, double eps, int min_pts,
int *labels, size_t max_bytes_per_batch, bool verbose) {
cumlError_t status;
ML::cumlHandle *handle_ptr;
std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle);
if (status == CUML_SUCCESS) {
try
{
dbscanFit(*handle_ptr, input, n_rows, n_cols, eps, min_pts, labels, max_bytes_per_batch, verbose);
}
//TODO: Implement this
//catch (const MLCommon::Exception& e)
//{
// //log e.what()?
// status = e.getErrorCode();
//}
catch (...)
{
status = CUML_ERROR_UNKNOWN;
}
}
return status;
}
46 changes: 17 additions & 29 deletions cpp/src/dbscan/dbscan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,29 @@
#pragma once
#include <cuML.hpp>

namespace ML{
namespace ML {

/**
* @defgroup DbscanCpp C++ implementation of Dbscan algo
* @brief Fits a DBSCAN model on an input feature matrix and outputs the labels.
* @param handle: cuml handle to use across the algorithm
* @param input: row-major input feature matrix
* @param n_rows: number of samples in the input feature matrix
* @param n_cols: number of features in the input feature matrix
* @param eps: the epsilon value to use for epsilon-neighborhood determination
* @param min_pts: minimum number of points to determine a cluster
* @param labels: (size n_rows) output labels array
* @param max_mem_bytes: the maximum number of bytes to be used for each batch of
* the pairwise distance calculation. This enables the trade off between
* memory usage and algorithm execution time.
* @param verbose: print useful information as algorithm executes
* @param[in] handle cuml handle to use across the algorithm
* @param[in] input row-major input feature matrix
* @param[in] n_rows number of samples in the input feature matrix
* @param[in] n_cols number of features in the input feature matrix
* @param[in] eps the epsilon value to use for epsilon-neighborhood determination
* @param[in] min_pts minimum number of points to determine a cluster
* @param[out] labels (size n_rows) output labels array
* @param[in] max_mem_bytes: the maximum number of bytes to be used for each batch of
* the pairwise distance calculation. This enables the trade off between
* memory usage and algorithm execution time.
* @param[in] verbose: print useful information as algorithm executes
* @{
*/
void dbscanFit(const cumlHandle& handle, float *input, int n_rows, int n_cols, float eps, int min_pts,
int *labels, size_t max_bytes_per_batch, bool verbose = false);

/**
* @brief Fits a DBSCAN model on an input feature matrix and outputs the labels.
* @param handle: cuml handle to use across the algorithm
* @param input: row-major input feature matrix
* @param n_rows: number of samples in the input feature matrix
* @param n_cols: number of features in the input feature matrix
* @param eps: the epsilon value to use for epsilon-neighborhood determination
* @param min_pts: minimum number of points to determine a cluster
* @param labels: (size n_rows) output labels array
* @param max_mem_bytes: the maximum number of bytes to be used for each batch of
* the pairwise distance calculation. This enables the trade off between
* memory usage and algorithm execution time.
* @param verbose: print useful information as algorithm executes
*/
int *labels, size_t max_bytes_per_batch, bool verbose = false);
void dbscanFit(const cumlHandle& handle, double *input, int n_rows, int n_cols, double eps, int min_pts,
int *labels, size_t max_bytes_per_batch, bool verbose = false);
int *labels, size_t max_bytes_per_batch, bool verbose = false);
/** @} */

}

68 changes: 68 additions & 0 deletions cpp/src/dbscan/dbscan_api.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2018-2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cuML_api.h>
#include "dbscan_api.h"
#include "dbscan.hpp"
#include "common/cumlHandle.hpp"

cumlError_t cumlSpDbscanFit(cumlHandle_t handle, float *input, int n_rows, int n_cols, float eps, int min_pts,
int *labels, size_t max_bytes_per_batch, int verbose) {
cumlError_t status;
ML::cumlHandle *handle_ptr;
std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle);
if (status == CUML_SUCCESS) {
try
{
dbscanFit(*handle_ptr, input, n_rows, n_cols, eps, min_pts, labels, max_bytes_per_batch, verbose);
}
//TODO: Implement this
//catch (const MLCommon::Exception& e)
//{
// //log e.what()?
// status = e.getErrorCode();
//}
catch (...)
{
status = CUML_ERROR_UNKNOWN;
}
}
return status;

}

cumlError_t cumlDpDbscanFit(cumlHandle_t handle, double *input, int n_rows, int n_cols, double eps, int min_pts,
int *labels, size_t max_bytes_per_batch, int verbose) {
cumlError_t status;
ML::cumlHandle *handle_ptr;
std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle);
if (status == CUML_SUCCESS) {
try
{
dbscanFit(*handle_ptr, input, n_rows, n_cols, eps, min_pts, labels, max_bytes_per_batch, verbose);
}
//TODO: Implement this
//catch (const MLCommon::Exception& e)
//{
// //log e.what()?
// status = e.getErrorCode();
//}
catch (...)
{
status = CUML_ERROR_UNKNOWN;
}
}
return status;
}
28 changes: 23 additions & 5 deletions cpp/src/dbscan/dbscan_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,31 @@
extern "C" {
#endif

//Single precision version of DBSCAN fit
/**
* @defgroup DbscanC C-wrapper to C++ implementation of Dbscan algo
* @brief Fits a DBSCAN model on an input feature matrix and outputs the labels.
* @param[in] handle cuml handle to use across the algorithm
* @param[in] input row-major input feature matrix
* @param[in] n_rows number of samples in the input feature matrix
* @param[in] n_cols number of features in the input feature matrix
* @param[in] eps the epsilon value to use for epsilon-neighborhood determination
* @param[in] min_pts minimum number of points to determine a cluster
* @param[out] labels (size n_rows) output labels array
* @param[in] max_mem_bytes: the maximum number of bytes to be used for each batch of
* the pairwise distance calculation. This enables the trade off between
* memory usage and algorithm execution time.
* @param[in] verbose Pass a 1 to print useful information as algorithm executes. To
* execute quietly, pass 0
* @return CUML_SUCCESS on success and other corresponding flags upon any failures.
* @{
*/
cumlError_t cumlSpDbscanFit(cumlHandle_t handle, float* input,
int n_rows, int n_cols, float eps, int min_pts, int *labels, size_t max_bytes_per_batch);

//Double precision version of DBSCAN fit
int n_rows, int n_cols, float eps, int min_pts, int *labels,
size_t max_bytes_per_batch, int verbose);
cumlError_t cumlDpDbscanFit(cumlHandle_t handle, double *input,
int n_rows, int n_cols, double eps, int min_pts, int *labels, size_t max_bytes_per_batch);
int n_rows, int n_cols, double eps, int min_pts, int *labels,
size_t max_bytes_per_batch, int verbose);
/** @} */

#ifdef __cplusplus
}
Expand Down
Loading

0 comments on commit 9d948d5

Please sign in to comment.