Skip to content

Commit

Permalink
Merge branch 'rapidsai:branch-24.08' into fea-nnd-dist-epilogue
Browse files Browse the repository at this point in the history
  • Loading branch information
jinsolp authored Jun 25, 2024
2 parents 31db603 + b863f18 commit fe64479
Show file tree
Hide file tree
Showing 31 changed files with 882 additions and 70 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ If citing the k-selection routines, please consider the following bibtex:
isbn = {9798400701092},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
location = {Denver, CO, USA}
location = {Denver, CO, USA},
series = {SC '23}
}
```
Expand Down
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,8 @@ if(RAFT_COMPILE_LIBRARY)
src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
Expand Down
4 changes: 0 additions & 4 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@ if(BUILD_CPU_ONLY)
set(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE OFF)
set(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB OFF)
set(RAFT_ANN_BENCH_USE_GGNN OFF)
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0)
# Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled.
# https://github.com/rapidsai/raft/issues/1627
set(RAFT_FAISS_ENABLE_GPU OFF)
endif()

set(RAFT_ANN_BENCH_USE_RAFT OFF)
Expand Down
10 changes: 8 additions & 2 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,8 +459,14 @@ void register_search(std::shared_ptr<const Dataset<T>> dataset,
*/
->MeasureProcessCPUTime()
->UseRealTime();

if (metric_objective == Objective::THROUGHPUT) { b->ThreadRange(threads[0], threads[1]); }
if (metric_objective == Objective::THROUGHPUT) {
if (index.algo.find("faiss_gpu") != std::string::npos) {
log_warn(
"FAISS GPU does not work in throughput mode because the underlying "
"StandardGpuResources object is not thread-safe. This will cause unexpected results");
}
b->ThreadRange(threads[0], threads[1]);
}
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ void parse_build_param(const nlohmann::json& conf,
{
parse_base_build_param<T>(conf, param);
param.M = conf.at("M");
if (conf.contains("usePrecomputed")) {
param.usePrecomputed = conf.at("usePrecomputed");
if (conf.contains("use_precomputed_table")) {
param.use_precomputed_table = conf.at("use_precomputed_table");
} else {
param.usePrecomputed = false;
param.use_precomputed_table = false;
}
if (conf.contains("bitsPerCode")) {
param.bitsPerCode = conf.at("bitsPerCode");
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ class FaissCpuIVFPQ : public FaissCpu<T> {
struct BuildParam : public FaissCpu<T>::BuildParam {
int M;
int bitsPerCode;
bool usePrecomputed;
bool use_precomputed_table;
};

FaissCpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissCpu<T>(metric, dim, param)
Expand Down
30 changes: 29 additions & 1 deletion cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpuIVFFlat<T>::BuildParam& param)
{
parse_base_build_param<T>(conf, param);
if (conf.contains("use_raft")) {
param.use_raft = conf.at("use_raft");
} else {
param.use_raft = false;
}
}

template <typename T>
Expand All @@ -63,6 +68,16 @@ void parse_build_param(const nlohmann::json& conf,
} else {
param.useFloat16 = false;
}
if (conf.contains("use_raft")) {
param.use_raft = conf.at("use_raft");
} else {
param.use_raft = false;
}
if (conf.contains("bitsPerCode")) {
param.bitsPerCode = conf.at("bitsPerCode");
} else {
param.bitsPerCode = 8;
}
}

template <typename T>
Expand Down Expand Up @@ -160,5 +175,18 @@ REGISTER_ALGO_INSTANCE(std::uint8_t);

#ifdef ANN_BENCH_BUILD_MAIN
#include "../common/benchmark.hpp"
int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
int main(int argc, char** argv)
{
rmm::mr::cuda_memory_resource cuda_mr;
// Construct a resource that uses a coalescing best-fit pool allocator
// and is initially sized to half of free device memory.
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
&cuda_mr, rmm::percent_of_free_device_memory(50)};
// Updates the current device resource pointer to `pool_mr`
auto old_mr = rmm::mr::set_current_device_resource(&pool_mr);
auto ret = raft::bench::ann::run_main(argc, argv);
// Restores the current device resource pointer to its previous value
rmm::mr::set_current_device_resource(old_mr);
return ret;
}
#endif
126 changes: 102 additions & 24 deletions cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,29 @@
#define FAISS_WRAPPER_H_

#include "../common/ann_types.hpp"
#include "../raft/raft_ann_bench_utils.h"

#include <raft/core/device_mdarray.hpp>
#include <raft/core/device_resources.hpp>
#include <raft/core/host_mdarray.hpp>
#include <raft/core/host_mdspan.hpp>
#include <raft/core/logger.hpp>
#include <raft/core/resource/stream_view.hpp>
#include <raft/distance/distance_types.hpp>
#include <raft/util/cudart_utils.hpp>

#include <raft_runtime/neighbors/refine.hpp>

#include <rmm/cuda_device.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/per_device_resource.hpp>

#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexRefine.h>
#include <faiss/IndexScalarQuantizer.h>
#include <faiss/MetricType.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
Expand All @@ -43,7 +57,7 @@

namespace {

faiss::MetricType parse_metric_type(raft::bench::ann::Metric metric)
faiss::MetricType parse_metric_faiss(raft::bench::ann::Metric metric)
{
if (metric == raft::bench::ann::Metric::kInnerProduct) {
return faiss::METRIC_INNER_PRODUCT;
Expand Down Expand Up @@ -95,7 +109,7 @@ class FaissGpu : public ANN<T>, public AnnGPU {
FaissGpu(Metric metric, int dim, const BuildParam& param)
: ANN<T>(metric, dim),
gpu_resource_{std::make_shared<faiss::gpu::StandardGpuResources>()},
metric_type_(parse_metric_type(metric)),
metric_type_(parse_metric_faiss(metric)),
nlist_{param.nlist},
training_sample_fraction_{1.0 / double(param.ratio)}
{
Expand Down Expand Up @@ -127,7 +141,7 @@ class FaissGpu : public ANN<T>, public AnnGPU {
AlgoProperty property;
// to enable building big dataset which is larger than GPU memory
property.dataset_memory_type = MemoryType::Host;
property.query_memory_type = MemoryType::Host;
property.query_memory_type = MemoryType::Device;
return property;
}

Expand Down Expand Up @@ -162,8 +176,10 @@ class FaissGpu : public ANN<T>, public AnnGPU {
int device_;
double training_sample_fraction_;
std::shared_ptr<faiss::SearchParameters> search_params_;
std::shared_ptr<faiss::IndexRefineSearchParameters> refine_search_params_{nullptr};
const T* dataset_;
float refine_ratio_ = 1.0;
Objective metric_objective_;
};

template <typename T>
Expand Down Expand Up @@ -201,19 +217,65 @@ template <typename T>
void FaissGpu<T>::search(
const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
{
ASSERT(Objective::LATENCY, "l2Knn: rowMajorIndex and rowMajorQuery should have same layout");
using IdxT = faiss::idx_t;
static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
"sizes of size_t and faiss::idx_t are different");

if (this->refine_ratio_ > 1.0) {
// TODO: FAISS changed their search APIs to accept the search parameters as a struct object
// but their refine API doesn't allow the struct to be passed in. Once this is fixed, we
// need to re-enable refinement below
// index_refine_->search(batch_size, queries, k, distances,
// reinterpret_cast<faiss::idx_t*>(neighbors), this->search_params_.get()); Related FAISS issue:
// https://github.com/facebookresearch/faiss/issues/3118
throw std::runtime_error(
"FAISS doesn't support refinement in their new APIs so this feature is disabled in the "
"benchmarks for the time being.");
if (refine_ratio_ > 1.0) {
if (raft::get_device_for_address(queries) >= 0) {
uint32_t k0 = static_cast<uint32_t>(refine_ratio_ * k);
auto distances_tmp = raft::make_device_matrix<float, IdxT>(
gpu_resource_->getRaftHandle(device_), batch_size, k0);
auto candidates =
raft::make_device_matrix<IdxT, IdxT>(gpu_resource_->getRaftHandle(device_), batch_size, k0);
index_->search(batch_size,
queries,
k0,
distances_tmp.data_handle(),
candidates.data_handle(),
this->search_params_.get());

auto queries_host = raft::make_host_matrix<T, IdxT>(batch_size, index_->d);
auto candidates_host = raft::make_host_matrix<IdxT, IdxT>(batch_size, k0);
auto neighbors_host = raft::make_host_matrix<IdxT, IdxT>(batch_size, k);
auto distances_host = raft::make_host_matrix<float, IdxT>(batch_size, k);
auto dataset_v = raft::make_host_matrix_view<const T, faiss::idx_t>(
this->dataset_, index_->ntotal, index_->d);

raft::device_resources handle_ = gpu_resource_->getRaftHandle(device_);

raft::copy(queries_host.data_handle(), queries, queries_host.size(), handle_.get_stream());
raft::copy(candidates_host.data_handle(),
candidates.data_handle(),
candidates_host.size(),
handle_.get_stream());

// wait for the queries to copy to host in 'stream`
handle_.sync_stream();

raft::runtime::neighbors::refine(handle_,
dataset_v,
queries_host.view(),
candidates_host.view(),
neighbors_host.view(),
distances_host.view(),
parse_metric_type(this->metric_));

raft::copy(neighbors,
(size_t*)neighbors_host.data_handle(),
neighbors_host.size(),
handle_.get_stream());
raft::copy(
distances, distances_host.data_handle(), distances_host.size(), handle_.get_stream());
} else {
index_refine_->search(batch_size,
queries,
k,
distances,
reinterpret_cast<faiss::idx_t*>(neighbors),
this->refine_search_params_.get());
}
} else {
index_->search(batch_size,
queries,
Expand Down Expand Up @@ -255,13 +317,16 @@ void FaissGpu<T>::load_(const std::string& file)
template <typename T>
class FaissGpuIVFFlat : public FaissGpu<T> {
public:
using typename FaissGpu<T>::BuildParam;
struct BuildParam : public FaissGpu<T>::BuildParam {
bool use_raft;
};

FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
{
faiss::gpu::GpuIndexIVFFlatConfig config;
config.device = this->device_;
this->index_ = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
config.device = this->device_;
config.use_raft = param.use_raft;
this->index_ = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config);
}

Expand Down Expand Up @@ -295,23 +360,26 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
int M;
bool useFloat16;
bool usePrecomputed;
bool use_raft;
int bitsPerCode;
};

FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
{
faiss::gpu::GpuIndexIVFPQConfig config;
config.useFloat16LookupTables = param.useFloat16;
config.usePrecomputedTables = param.usePrecomputed;
config.use_raft = param.use_raft;
config.interleavedLayout = param.use_raft;
config.device = this->device_;

this->index_ =
std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
dim,
param.nlist,
param.M,
8, // FAISS only supports bitsPerCode=8
this->metric_type_,
config);
this->index_ = std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
dim,
param.nlist,
param.M,
param.bitsPerCode,
this->metric_type_,
config);
}

void set_search_param(const typename FaissGpu<T>::AnnSearchParam& param) override
Expand All @@ -329,6 +397,11 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
this->index_refine_ =
std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
this->index_refine_.get()->k_factor = search_param.refine_ratio;
faiss::IndexRefineSearchParameters faiss_refine_search_params;
faiss_refine_search_params.k_factor = this->index_refine_.get()->k_factor;
faiss_refine_search_params.base_index_params = this->search_params_.get();
this->refine_search_params_ =
std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
}
}

Expand Down Expand Up @@ -385,6 +458,11 @@ class FaissGpuIVFSQ : public FaissGpu<T> {
this->index_refine_ =
std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
this->index_refine_.get()->k_factor = search_param.refine_ratio;
faiss::IndexRefineSearchParameters faiss_refine_search_params;
faiss_refine_search_params.k_factor = this->index_refine_.get()->k_factor;
faiss_refine_search_params.base_index_params = this->search_params_.get();
this->refine_search_params_ =
std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
}
}

Expand Down
3 changes: 2 additions & 1 deletion cpp/cmake/thirdparty/get_faiss.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ function(find_and_configure_faiss)
EXCLUDE_FROM_ALL ${exclude}
OPTIONS
"FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}"
"FAISS_ENABLE_RAFT ${PKG_ENABLE_GPU}"
"FAISS_ENABLE_PYTHON OFF"
"FAISS_OPT_LEVEL ${RAFT_FAISS_OPT_LEVEL}"
"FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}"
Expand Down Expand Up @@ -115,4 +116,4 @@ endfunction()
find_and_configure_faiss(
BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC}
ENABLE_GPU ${RAFT_FAISS_ENABLE_GPU}
)
)
Loading

0 comments on commit fe64479

Please sign in to comment.