Merge pull request rapidsai#641 from teju85/fea-ext-separate-c-librar…

…y-build [REVIEW] Separate c library build
rlratzel · Jun 4, 2019 · 9d948d5 · 9d948d5
2 parents 5ee2601 + 77aac6d
commit 9d948d5
Show file tree

Hide file tree

Showing 12 changed files with 221 additions and 153 deletions.
diff --git a/BUILD.md b/BUILD.md
@@ -60,7 +60,7 @@ $ cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DGPU_ARCHS="70"
 
 There are many options to configure the build process, see the [customizing build section](#custom-build-options).
 
-3. Build `libcuml++`:
+3. Build `libcuml++` and `libcuml`:
 
 ```bash
 $ make -j
@@ -116,7 +116,7 @@ $ ./build.sh                           # build the cuML libraries, tests, and py
 
 To build individual components, specify them as arguments to `build.sh`
 ```bash
-$ ./build.sh libcuml                   # build and install the cuML C++ library
+$ ./build.sh libcuml                   # build and install the cuML C++ and C-wrapper libraries
 $ ./build.sh cuml                      # build and install the cuML python package
 $ ./build.sh prims                     # build the ML prims tests
 ```
@@ -139,7 +139,8 @@ cuML's cmake has the following configurable flags available:
 | Flag | Possible Values | Default Value | Behavior |
 | --- | --- | --- | --- |
 | BLAS_LIBRARIES | path/to/blas_lib | "" | Optional variable allowing to manually specify location of BLAS library. |
-| BUILD_CUML_CPP_LIBRARY | [ON, OFF]  | ON  | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
+| BUILD_CUML_CPP_LIBRARY | [ON, OFF]  | ON  | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_C_LIBRARY, BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
+| BUILD_CUML_C_LIBRARY | [ON, OFF]  | ON  | Enable/disable building libcuml shared library. Setting this variable to `ON` will set the variable BUILD_CUML_CPP_LIBRARY to `ON` |
 | BUILD_CUML_TESTS | [ON, OFF]  | ON  |  Enable/disable building cuML algorithm test executable `ml_test`.  |
 | BUILD_CUML_MG_TESTS | [ON, OFF]  | ON  |  Enable/disable building cuML algorithm test executable `ml_mg_test`. |
 | BUILD_PRIMS_TESTS | [ON, OFF]  | ON  | Enable/disable building cuML algorithm test executable `prims_test`.  |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@
 - PR #636: Rand Index metric ml-prim
 - PR #515: Added Random Projection feature
 - PR #504: Contingency matrix ml-prim
+- PR #641: C: Separate C-wrapper library build to generate libcuml.so
 - PR #631: Add nvcategory based ordinal label encoder
 
 ## Improvements

diff --git a/build.sh b/build.sh
@@ -22,7 +22,8 @@ VALIDARGS="clean libcuml cuml prims -v -g -n --allgpuarch --multigpu -h --help"
 HELP="$0 [<target> ...] [<flag> ...]
  where <target> is:
    clean         - remove all existing build artifacts and configuration (start over)
-   libcuml       - build the cuml C++ code only
+   libcuml       - build the cuml C++ code only. Also builds the C-wrapper library
+                   around the C++ code.
    cuml          - build the cuml Python package
    prims         - build the ML prims tests
  and <flag> is:
@@ -130,7 +131,7 @@ fi
 if (( ${NUMARGS} == 0 )) || hasArg libcuml; then
 
     cd ${LIBCUML_BUILD_DIR}
-    make -j${PARALLEL_LEVEL} cuml++ ml ml_mg VERBOSE=${VERBOSE} ${INSTALL_TARGET}
+    make -j${PARALLEL_LEVEL} cuml++ cuml ml ml_mg VERBOSE=${VERBOSE} ${INSTALL_TARGET}
 fi
 
 # Build and (optionally) install the cuml Python package

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -41,6 +41,8 @@ endif()
 
 option(BUILD_CUML_CPP_LIBRARY "Build libcuml++ shared library" ON)
 
+option(BUILD_CUML_C_LIBRARY "Build libcuml shared library" ON)
+
 option(BUILD_CUML_TESTS "Build cuML algorithm tests" ON)
 
 option(BUILD_CUML_MG_TESTS "Build cuML multigpu algorithm tests" ON)
@@ -66,8 +68,14 @@ set(GPU_ARCHS "" CACHE STRING
 set(CMAKE_IGNORE_PATH "${CMAKE_INSTALL_DIR}/lib" CACHE STRING
   "Ignore any libs added implicitly from the CMAKE_INSTALL_DIR")
 
-# Disabling libcuml++ disables buidling algorithm tests and examples
+# Enabling libcuml enables building libcuml++
+if(BUILD_CUML_C_LIBRARY)
+  set(BUILD_CUML_CPP_LIBRARY ON)
+endif(BUILD_CUML_C_LIBRARY)
+
+# Disabling libcuml++ disables building algorithm tests and examples
 if(NOT BUILD_CUML_CPP_LIBRARY)
+  set(BUILD_CUML_C_LIBRARY OFF)
   set(BUILD_CUML_TESTS OFF)
   set(BUILD_CUML_MG_TESTS OFF)
   set(BUILD_CUML_EXAMPLES OFF)
@@ -263,14 +271,13 @@ set(PRIMS_TEST_UTILS
     src_prims/utils.h)
 
 ###################################################################################################
-# - build libcuml++ shared library ------------------------------------------------------------------
+# - build libcuml++ shared library ----------------------------------------------------------------
 
 if(BUILD_CUML_CPP_LIBRARY)
 
   set(CUML_CPP_TARGET "cuml++")
   add_library(${CUML_CPP_TARGET} SHARED
     src/common/cumlHandle.cpp
-    src/common/cuml_api.cpp
     src/dbscan/dbscan.cu
     src/decisiontree/decisiontree.cu
     src/glm/glm.cu
@@ -305,6 +312,18 @@ if(BUILD_CUML_CPP_LIBRARY)
 
 endif(BUILD_CUML_CPP_LIBRARY)
 
+###################################################################################################
+# - build libcuml shared library ------------------------------------------------------------------
+
+if(BUILD_CUML_C_LIBRARY)
+  set(CUML_C_TARGET "cuml")
+  add_library(${CUML_C_TARGET} SHARED
+    src/common/cuml_api.cpp
+    src/dbscan/dbscan_api.cpp
+    src/glm/glm_api.cpp)
+  target_link_libraries(${CUML_C_TARGET} ${CUML_CPP_TARGET})
+endif(BUILD_CUML_C_LIBRARY)
+
 ###################################################################################################
 # - build test executables ------------------------------------------------------------------------
 

diff --git a/cpp/src/cuML_api.h b/cpp/src/cuML_api.h
@@ -26,7 +26,11 @@ extern "C" {
 
 typedef int cumlHandle_t;
 
-enum cumlError_t { CUML_SUCCESS, CUML_ERROR_UNKNOWN, CUML_INVALID_HANDLE };
+typedef enum {
+    CUML_SUCCESS,
+    CUML_ERROR_UNKNOWN,
+    CUML_INVALID_HANDLE
+} cumlError_t;
 
 typedef cudaError_t (*cuml_allocate)(void** p,size_t n, cudaStream_t stream);
 typedef cudaError_t (*cuml_deallocate)(void* p, size_t n, cudaStream_t stream);

diff --git a/cpp/src/dbscan/dbscan.cu b/cpp/src/dbscan/dbscan.cu
@@ -38,53 +38,3 @@ void dbscanFit(const cumlHandle& handle, double *input, int n_rows, int n_cols,
 }
 
 }; // end namespace ML
-
-
-extern "C" cumlError_t cumlSpDbscanFit(cumlHandle_t handle, float *input, int n_rows, int n_cols, float eps, int min_pts,
-               int *labels, size_t max_bytes_per_batch, bool verbose) {
-    cumlError_t status;
-    ML::cumlHandle *handle_ptr;
-    std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle);
-    if (status == CUML_SUCCESS) {
-        try
-        {
-            dbscanFit(*handle_ptr, input, n_rows, n_cols, eps, min_pts, labels, max_bytes_per_batch, verbose);
-        }
-        //TODO: Implement this
-        //catch (const MLCommon::Exception& e)
-        //{
-        //    //log e.what()?
-        //    status =  e.getErrorCode();
-        //}
-        catch (...)
-        {
-            status = CUML_ERROR_UNKNOWN;
-        }
-    }
-    return status;
-
-}
-
-extern "C" cumlError_t cumlDpDbscanFit(cumlHandle_t handle, double *input, int n_rows, int n_cols, double eps, int min_pts,
-               int *labels, size_t max_bytes_per_batch, bool verbose) {
-    cumlError_t status;
-    ML::cumlHandle *handle_ptr;
-    std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle);
-    if (status == CUML_SUCCESS) {
-        try
-        {
-            dbscanFit(*handle_ptr, input, n_rows, n_cols, eps, min_pts, labels, max_bytes_per_batch, verbose);
-        }
-        //TODO: Implement this
-        //catch (const MLCommon::Exception& e)
-        //{
-        //    //log e.what()?
-        //    status =  e.getErrorCode();
-        //}
-        catch (...)
-        {
-            status = CUML_ERROR_UNKNOWN;
-        }
-    }
-    return status;
-}
diff --git a/cpp/src/dbscan/dbscan.hpp b/cpp/src/dbscan/dbscan.hpp
@@ -17,41 +17,29 @@
 #pragma once
 #include <cuML.hpp>
 
-namespace ML{
+namespace ML {
 
 /**
+ * @defgroup DbscanCpp C++ implementation of Dbscan algo
  * @brief Fits a DBSCAN model on an input feature matrix and outputs the labels.
- * @param handle: cuml handle to use across the algorithm
- * @param input: row-major input feature matrix
- * @param n_rows: number of samples in the input feature matrix
- * @param n_cols: number of features in the input feature matrix
- * @param eps: the epsilon value to use for epsilon-neighborhood determination
- * @param min_pts: minimum number of points to determine a cluster
- * @param labels: (size n_rows) output labels array
- * @param max_mem_bytes: the maximum number of bytes to be used for each batch of
- *          the pairwise distance calculation. This enables the trade off between
- *          memory usage and algorithm execution time.
- * @param verbose: print useful information as algorithm executes
+ * @param[in] handle cuml handle to use across the algorithm
+ * @param[in] input row-major input feature matrix
+ * @param[in] n_rows number of samples in the input feature matrix
+ * @param[in] n_cols number of features in the input feature matrix
+ * @param[in] eps the epsilon value to use for epsilon-neighborhood determination
+ * @param[in] min_pts minimum number of points to determine a cluster
+ * @param[out] labels (size n_rows) output labels array
+ * @param[in] max_mem_bytes: the maximum number of bytes to be used for each batch of
+ *            the pairwise distance calculation. This enables the trade off between
+ *            memory usage and algorithm execution time.
+ * @param[in] verbose: print useful information as algorithm executes
+ * @{
  */
 void dbscanFit(const cumlHandle& handle, float *input, int n_rows, int n_cols, float eps, int min_pts,
-		       int *labels, size_t max_bytes_per_batch, bool verbose = false);
-
-/**
- * @brief Fits a DBSCAN model on an input feature matrix and outputs the labels.
- * @param handle: cuml handle to use across the algorithm
- * @param input: row-major input feature matrix
- * @param n_rows: number of samples in the input feature matrix
- * @param n_cols: number of features in the input feature matrix
- * @param eps: the epsilon value to use for epsilon-neighborhood determination
- * @param min_pts: minimum number of points to determine a cluster
- * @param labels: (size n_rows) output labels array
- * @param max_mem_bytes: the maximum number of bytes to be used for each batch of
- *          the pairwise distance calculation. This enables the trade off between
- *          memory usage and algorithm execution time.
- * @param verbose: print useful information as algorithm executes
- */
+               int *labels, size_t max_bytes_per_batch, bool verbose = false);
 void dbscanFit(const cumlHandle& handle, double *input, int n_rows, int n_cols, double eps, int min_pts,
-		       int *labels, size_t max_bytes_per_batch, bool verbose = false);
+               int *labels, size_t max_bytes_per_batch, bool verbose = false);
+/** @} */
 
 }
 
diff --git a/cpp/src/dbscan/dbscan_api.cpp b/cpp/src/dbscan/dbscan_api.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018-2019, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cuML_api.h>
+#include "dbscan_api.h"
+#include "dbscan.hpp"
+#include "common/cumlHandle.hpp"
+
+cumlError_t cumlSpDbscanFit(cumlHandle_t handle, float *input, int n_rows, int n_cols, float eps, int min_pts,
+                            int *labels, size_t max_bytes_per_batch, int verbose) {
+    cumlError_t status;
+    ML::cumlHandle *handle_ptr;
+    std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle);
+    if (status == CUML_SUCCESS) {
+        try
+        {
+            dbscanFit(*handle_ptr, input, n_rows, n_cols, eps, min_pts, labels, max_bytes_per_batch, verbose);
+        }
+        //TODO: Implement this
+        //catch (const MLCommon::Exception& e)
+        //{
+        //    //log e.what()?
+        //    status =  e.getErrorCode();
+        //}
+        catch (...)
+        {
+            status = CUML_ERROR_UNKNOWN;
+        }
+    }
+    return status;
+
+}
+
+cumlError_t cumlDpDbscanFit(cumlHandle_t handle, double *input, int n_rows, int n_cols, double eps, int min_pts,
+                            int *labels, size_t max_bytes_per_batch, int verbose) {
+    cumlError_t status;
+    ML::cumlHandle *handle_ptr;
+    std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle);
+    if (status == CUML_SUCCESS) {
+        try
+        {
+            dbscanFit(*handle_ptr, input, n_rows, n_cols, eps, min_pts, labels, max_bytes_per_batch, verbose);
+        }
+        //TODO: Implement this
+        //catch (const MLCommon::Exception& e)
+        //{
+        //    //log e.what()?
+        //    status =  e.getErrorCode();
+        //}
+        catch (...)
+        {
+            status = CUML_ERROR_UNKNOWN;
+        }
+    }
+    return status;
+}
diff --git a/cpp/src/dbscan/dbscan_api.h b/cpp/src/dbscan/dbscan_api.h
@@ -21,13 +21,31 @@
 extern "C" {
 #endif
 
-//Single precision version of DBSCAN fit
+/**
+ * @defgroup DbscanC C-wrapper to C++ implementation of Dbscan algo
+ * @brief Fits a DBSCAN model on an input feature matrix and outputs the labels.
+ * @param[in] handle cuml handle to use across the algorithm
+ * @param[in] input row-major input feature matrix
+ * @param[in] n_rows number of samples in the input feature matrix
+ * @param[in] n_cols number of features in the input feature matrix
+ * @param[in] eps the epsilon value to use for epsilon-neighborhood determination
+ * @param[in] min_pts minimum number of points to determine a cluster
+ * @param[out] labels (size n_rows) output labels array
+ * @param[in] max_mem_bytes: the maximum number of bytes to be used for each batch of
+ *            the pairwise distance calculation. This enables the trade off between
+ *            memory usage and algorithm execution time.
+ * @param[in] verbose Pass a 1 to print useful information as algorithm executes. To
+ * execute quietly, pass 0
+ * @return CUML_SUCCESS on success and other corresponding flags upon any failures.
+ * @{
+ */
 cumlError_t cumlSpDbscanFit(cumlHandle_t handle, float* input,
-        int n_rows, int n_cols, float eps, int min_pts, int *labels, size_t max_bytes_per_batch);
-
-//Double precision version of DBSCAN fit
+                            int n_rows, int n_cols, float eps, int min_pts, int *labels,
+                            size_t max_bytes_per_batch, int verbose);
 cumlError_t cumlDpDbscanFit(cumlHandle_t handle, double *input,
-        int n_rows, int n_cols, double eps, int min_pts, int *labels, size_t max_bytes_per_batch);
+                            int n_rows, int n_cols, double eps, int min_pts, int *labels,
+                            size_t max_bytes_per_batch, int verbose);
+/** @} */
 
 #ifdef __cplusplus
 }