diff --git a/apps/build_stitched_index.cpp b/apps/build_stitched_index.cpp index ae07a8d42..60e38c1be 100644 --- a/apps/build_stitched_index.cpp +++ b/apps/build_stitched_index.cpp @@ -286,7 +286,8 @@ void prune_and_save(path final_index_path_prefix, path full_index_path_prefix, p diskann::get_bin_metadata(input_data_path, number_of_label_points, dimension); - diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, false, false, false, 0, false); + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, false, + false, false, 0, false); // not searching this index, set search_l to 0 index.load(full_index_path_prefix.c_str(), num_threads, 1); diff --git a/apps/utils/count_bfs_levels.cpp b/apps/utils/count_bfs_levels.cpp index 9bf84b75d..6dd2d6233 100644 --- a/apps/utils/count_bfs_levels.cpp +++ b/apps/utils/count_bfs_levels.cpp @@ -27,7 +27,8 @@ template void bfs_count(const std::string &index_path, uint32_t dat { using TagT = uint32_t; using LabelT = uint32_t; - diskann::Index index(diskann::Metric::L2, data_dims, 0, nullptr, nullptr, 0, false, false, false, false, 0, false); + diskann::Index index(diskann::Metric::L2, data_dims, 0, nullptr, nullptr, 0, false, false, false, + false, 0, false); std::cout << "Index class instantiated" << std::endl; index.load(index_path.c_str(), 1, 100); std::cout << "Index loaded" << std::endl; diff --git a/include/abstract_data_store.h b/include/abstract_data_store.h index 606b9fb7f..c5c5a322e 100644 --- a/include/abstract_data_store.h +++ b/include/abstract_data_store.h @@ -80,10 +80,11 @@ template class AbstractDataStore // num_points) to zero virtual void copy_vectors(const location_t from_loc, const location_t to_loc, const location_t num_points) = 0; - //With the PQ Data Store PR, we have also changed iterate_to_fixed_point to NOT take the query - //from the scratch object. Therefore every data store has to implement preprocess_query which - //at the least will be to copy the query into the scratch object. So making this pure virtual. - virtual void preprocess_query(const data_t *aligned_query, AbstractScratch *query_scratch = nullptr) const = 0; + // With the PQ Data Store PR, we have also changed iterate_to_fixed_point to NOT take the query + // from the scratch object. Therefore every data store has to implement preprocess_query which + // at the least will be to copy the query into the scratch object. So making this pure virtual. + virtual void preprocess_query(const data_t *aligned_query, + AbstractScratch *query_scratch = nullptr) const = 0; // distance functions. virtual float get_distance(const data_t *query, const location_t loc) const = 0; virtual void get_distance(const data_t *query, const location_t *locations, const uint32_t location_count, @@ -98,10 +99,10 @@ template class AbstractDataStore // in the dataset virtual location_t calculate_medoid() const = 0; - //REFACTOR PQ TODO: Each data store knows about its distance function, so this is - //redundant. However, we don't have an OptmizedDataStore yet, and to preserve code - //compability, we are exposing this function. - virtual Distance* get_dist_fn() const = 0; + // REFACTOR PQ TODO: Each data store knows about its distance function, so this is + // redundant. However, we don't have an OptmizedDataStore yet, and to preserve code + // compability, we are exposing this function. + virtual Distance *get_dist_fn() const = 0; // search helpers // if the base data is aligned per the request of the metric, this will tell diff --git a/include/in_mem_data_store.h b/include/in_mem_data_store.h index dd152b343..d1ad795f6 100644 --- a/include/in_mem_data_store.h +++ b/include/in_mem_data_store.h @@ -49,14 +49,15 @@ template class InMemDataStore : public AbstractDataStore *scratch) const override; + virtual void get_distance(const data_t *preprocessed_query, const location_t *locations, + const uint32_t location_count, float *distances, + AbstractScratch *scratch) const override; virtual void get_distance(const data_t *preprocessed_query, const std::vector &ids, std::vector &distances, AbstractScratch *scratch_space) const override; virtual location_t calculate_medoid() const override; - virtual Distance* get_dist_fn() const override; + virtual Distance *get_dist_fn() const override; virtual size_t get_alignment_factor() const override; diff --git a/include/index.h b/include/index.h index b6bf2c300..199171020 100644 --- a/include/index.h +++ b/include/index.h @@ -68,8 +68,6 @@ template clas const bool pq_dist_build = false, const size_t num_pq_chunks = 0, const bool use_opq = false, const bool filtered_index = false); - - DISKANN_DLLEXPORT ~Index(); // Saves graph, data, metadata and associated tags. @@ -255,10 +253,9 @@ template clas // with iterate_to_fixed_point. std::vector get_init_ids(); - //The query to use is placed in scratch->aligned_query + // The query to use is placed in scratch->aligned_query std::pair iterate_to_fixed_point(InMemQueryScratch *scratch, const uint32_t Lindex, - const std::vector &init_ids, - bool use_filter, + const std::vector &init_ids, bool use_filter, const std::vector &filters, bool search_invocation); void search_for_point_and_prune(int location, uint32_t Lindex, std::vector &pruned_list, @@ -340,7 +337,6 @@ template clas // Data std::shared_ptr> _data_store; - // Graph related data structures std::unique_ptr _graph_store; diff --git a/include/index_factory.h b/include/index_factory.h index c1a3dd47f..80bc40dba 100644 --- a/include/index_factory.h +++ b/include/index_factory.h @@ -3,8 +3,6 @@ #include "in_mem_graph_store.h" #include "pq_data_store.h" - - namespace diskann { class IndexFactory @@ -13,15 +11,13 @@ class IndexFactory DISKANN_DLLEXPORT explicit IndexFactory(const IndexConfig &config); DISKANN_DLLEXPORT std::unique_ptr create_instance(); - DISKANN_DLLEXPORT static std::unique_ptr construct_graphstore( const GraphStoreStrategy stratagy, const size_t size, const size_t reserve_graph_degree); template DISKANN_DLLEXPORT static std::shared_ptr> construct_datastore(DataStoreStrategy stratagy, size_t num_points, - size_t dimension, - Metric m); + size_t dimension, Metric m); // For now PQDataStore incorporates within itself all variants of quantization that we support. In the // future it may be necessary to introduce an AbstractPQDataStore class to spearate various quantization // flavours. @@ -33,7 +29,7 @@ class IndexFactory template static Distance *construct_inmem_distance_fn(Metric m); private: - void check_config(); + void check_config(); template std::unique_ptr create_instance(); diff --git a/include/pq_data_store.h b/include/pq_data_store.h index f5df3c80c..7c0cb5fe0 100644 --- a/include/pq_data_store.h +++ b/include/pq_data_store.h @@ -7,16 +7,16 @@ namespace diskann { - //REFACTOR TODO: By default, the PQDataStore is an in-memory datastore because both Vamana and - //DiskANN treat it the same way. But with DiskPQ, that may need to change. +// REFACTOR TODO: By default, the PQDataStore is an in-memory datastore because both Vamana and +// DiskANN treat it the same way. But with DiskPQ, that may need to change. template class PQDataStore : public AbstractDataStore { public: PQDataStore(size_t dim, location_t num_points, size_t num_pq_chunks, std::unique_ptr> distance_fn, std::unique_ptr> pq_distance_fn); - PQDataStore(const PQDataStore&) = delete; - PQDataStore &operator=(const PQDataStore&) = delete; + PQDataStore(const PQDataStore &) = delete; + PQDataStore &operator=(const PQDataStore &) = delete; ~PQDataStore(); // Load quantized vectors from a set of files. Here filename is treated @@ -67,7 +67,7 @@ template class PQDataStore : public AbstractDataStore // We are returning the distance function that is used for full precision // vectors here, not the PQ distance function. This is because the callers // all are expecting a Distance not QuantizedDistance. - virtual Distance* get_dist_fn() const override; + virtual Distance *get_dist_fn() const override; virtual location_t calculate_medoid() const override; diff --git a/src/abstract_data_store.cpp b/src/abstract_data_store.cpp index 791d38618..0cff0152e 100644 --- a/src/abstract_data_store.cpp +++ b/src/abstract_data_store.cpp @@ -39,7 +39,6 @@ template location_t AbstractDataStore::resize(const lo } } - template DISKANN_DLLEXPORT class AbstractDataStore; template DISKANN_DLLEXPORT class AbstractDataStore; template DISKANN_DLLEXPORT class AbstractDataStore; diff --git a/src/in_mem_data_store.cpp b/src/in_mem_data_store.cpp index 1dccfe056..cc7acf615 100644 --- a/src/in_mem_data_store.cpp +++ b/src/in_mem_data_store.cpp @@ -182,7 +182,7 @@ template void InMemDataStore::prefetch_vector(const lo template void InMemDataStore::preprocess_query(const data_t *query, AbstractScratch *query_scratch) const { - if (query_scratch != nullptr ) + if (query_scratch != nullptr) { memcpy(query_scratch->aligned_query_T(), query, sizeof(data_t) * this->get_dims()); } @@ -218,7 +218,7 @@ float InMemDataStore::get_distance(const location_t loc1, const location (uint32_t)this->_aligned_dim); } -template +template void InMemDataStore::get_distance(const data_t *preprocessed_query, const std::vector &ids, std::vector &distances, AbstractScratch *scratch_space) const { @@ -389,7 +389,7 @@ template location_t InMemDataStore::calculate_medoid() return min_idx; } -template Distance* InMemDataStore::get_dist_fn() const +template Distance *InMemDataStore::get_dist_fn() const { return this->_distance_fn.get(); } diff --git a/src/index.cpp b/src/index.cpp index 5db9ebb23..97413de07 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -30,8 +30,8 @@ namespace diskann // (bin), and initialize max_points template Index::Index(const IndexConfig &index_config, std::shared_ptr> data_store, - std::unique_ptr graph_store, - std::shared_ptr> pq_data_store) + std::unique_ptr graph_store, + std::shared_ptr> pq_data_store) : _dist_metric(index_config.metric), _dim(index_config.dimension), _max_points(index_config.max_points), _num_frozen_pts(index_config.num_frozen_pts), _dynamic_index(index_config.dynamic_index), _enable_tags(index_config.enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr), @@ -142,10 +142,13 @@ Index::Index(Metric m, const size_t dim, const size_t max_point (size_t)((index_parameters == nullptr ? 0 : index_parameters->max_degree) * defaults::GRAPH_SLACK_FACTOR * 1.05))) { - if (_pq_dist) { - _pq_data_store = - IndexFactory::construct_pq_datastore(DataStoreStrategy::MEMORY, max_points + num_frozen_pts, dim, m, num_pq_chunks, use_opq); - } else { + if (_pq_dist) + { + _pq_data_store = IndexFactory::construct_pq_datastore(DataStoreStrategy::MEMORY, max_points + num_frozen_pts, + dim, m, num_pq_chunks, use_opq); + } + else + { _pq_data_store = _data_store; } } @@ -784,8 +787,8 @@ bool Index::detect_common_filters(uint32_t point_id, bool searc template std::pair Index::iterate_to_fixed_point( - InMemQueryScratch *scratch, const uint32_t Lsize, const std::vector &init_ids, - bool use_filter, const std::vector &filter_labels, bool search_invocation) + InMemQueryScratch *scratch, const uint32_t Lsize, const std::vector &init_ids, bool use_filter, + const std::vector &filter_labels, bool search_invocation) { std::vector &expanded_nodes = scratch->pool(); NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes(); @@ -1143,7 +1146,7 @@ void Index::prune_neighbors(const uint32_t location, std::vecto } // If using _pq_build, over-write the PQ distances with actual distances - //REFACTOR PQ: TODO: How to get rid of this!? + // REFACTOR PQ: TODO: How to get rid of this!? if (_pq_dist) { for (auto &ngh : pool) @@ -1629,15 +1632,15 @@ void Index::build(const char *filename, const size_t num_points throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } - //REFACTOR PQ TODO: We can remove this if and add a check in the InMemDataStore - //to not populate_data if it has been called once. + // REFACTOR PQ TODO: We can remove this if and add a check in the InMemDataStore + // to not populate_data if it has been called once. if (_pq_dist) { #ifdef EXEC_ENV_OLS - std::stringstream ss; + std::stringstream ss; ss << "PQ Build is not supported in DLVS environment (i.e. if EXEC_ENV_OLS is defined)" << std::endl; diskann::cerr << ss.str() << std::endl; - throw ANNException(ss.str(),-1, __FUNCSIG__, __FILE__, __LINE__); + throw ANNException(ss.str(), -1, __FUNCSIG__, __FILE__, __LINE__); #else // REFACTOR TODO: Both in the previous code and in the current PQDataStore, // we are writing the PQ files in the same path as the input file. Now we @@ -1957,8 +1960,7 @@ std::pair Index::search(const T *query, con _data_store->preprocess_query(query, scratch); - auto retval = - iterate_to_fixed_point(scratch, L, init_ids, false, unused_filter_label, true); + auto retval = iterate_to_fixed_point(scratch, L, init_ids, false, unused_filter_label, true); NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes(); @@ -2228,7 +2230,7 @@ template void Index Distance* IndexFactory::construct_inmem_distance_fn(Metric metric) +template Distance *IndexFactory::construct_inmem_distance_fn(Metric metric) { - if (metric == diskann::Metric::COSINE && std::is_same::value) { + if (metric == diskann::Metric::COSINE && std::is_same::value) + { return (Distance *)new AVXNormalizedCosineDistanceFloat(); - } else { + } + else + { return (Distance *)get_distance_function(metric); } } template -std::shared_ptr> IndexFactory::construct_datastore(DataStoreStrategy strategy, size_t total_internal_points, - size_t dimension, Metric metric) +std::shared_ptr> IndexFactory::construct_datastore(DataStoreStrategy strategy, + size_t total_internal_points, size_t dimension, + Metric metric) { std::unique_ptr> distance; switch (strategy) { case DataStoreStrategy::MEMORY: distance.reset(construct_inmem_distance_fn(metric)); - return std::make_shared>((location_t)total_internal_points, dimension, std::move(distance)); + return std::make_shared>((location_t)total_internal_points, dimension, + std::move(distance)); default: break; } @@ -120,20 +124,24 @@ std::unique_ptr IndexFactory::create_instance() auto data_store = construct_datastore(_config->data_strategy, num_points, dim, _config->metric); std::shared_ptr> pq_data_store = nullptr; - if (_config->data_strategy == DataStoreStrategy::MEMORY && _config->pq_dist_build) { - pq_data_store = construct_pq_datastore(_config->data_strategy, num_points + _config->num_frozen_pts, dim, _config->metric, - _config->num_pq_chunks, _config->use_opq); - } else { + if (_config->data_strategy == DataStoreStrategy::MEMORY && _config->pq_dist_build) + { + pq_data_store = + construct_pq_datastore(_config->data_strategy, num_points + _config->num_frozen_pts, dim, + _config->metric, _config->num_pq_chunks, _config->use_opq); + } + else + { pq_data_store = data_store; } size_t max_reserve_degree = (size_t)(defaults::GRAPH_SLACK_FACTOR * 1.05 * - (_config->index_write_params == nullptr ? 0 : _config->index_write_params->max_degree)); + (_config->index_write_params == nullptr ? 0 : _config->index_write_params->max_degree)); std::unique_ptr graph_store = construct_graphstore(_config->graph_strategy, num_points + _config->num_frozen_pts, max_reserve_degree); - //REFACTOR TODO: Must construct in-memory PQDatastore if strategy == ONDISK and must construct - //in-mem and on-disk PQDataStore if strategy == ONDISK and diskPQ is required. + // REFACTOR TODO: Must construct in-memory PQDatastore if strategy == ONDISK and must construct + // in-mem and on-disk PQDataStore if strategy == ONDISK and diskPQ is required. return std::make_unique>(*_config, data_store, std::move(graph_store), pq_data_store); } @@ -195,11 +203,11 @@ std::unique_ptr IndexFactory::create_instance(const std::string & throw ANNException("Error: unsupported label_type please choose from [uint/ushort]", -1); } -//template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( -// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); -//template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( -// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); -//template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( -// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +// template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( +// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +// template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( +// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +// template DISKANN_DLLEXPORT std::shared_ptr> IndexFactory::construct_datastore( +// DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); } // namespace diskann diff --git a/src/pq_data_store.cpp b/src/pq_data_store.cpp index 6207b75e2..c47c16705 100644 --- a/src/pq_data_store.cpp +++ b/src/pq_data_store.cpp @@ -18,7 +18,8 @@ PQDataStore::PQDataStore(size_t dim, location_t num_points, size_t num_p : AbstractDataStore(num_points, dim), _quantized_data(nullptr), _num_chunks(num_pq_chunks), _distance_metric(distance_fn->get_metric()) { - if (num_pq_chunks > dim) { + if (num_pq_chunks > dim) + { throw diskann::ANNException("ERROR: num_pq_chunks > dim", -1, __FUNCSIG__, __FILE__, __LINE__); } _distance_fn = std::move(distance_fn); @@ -213,7 +214,7 @@ template size_t PQDataStore::get_alignment_factor() co return 1; } -template Distance* PQDataStore::get_dist_fn() const +template Distance *PQDataStore::get_dist_fn() const { return _distance_fn.get(); }