From 615247afed4bb65709e92ecd4b00eb6e942503cf Mon Sep 17 00:00:00 2001 From: rakri Date: Tue, 23 Jan 2024 06:09:48 +0000 Subject: [PATCH] clang formatted --- apps/build_stitched_index.cpp | 4 +-- include/abstract_filter_store.h | 5 +-- include/disk_utils.h | 2 +- include/filter_utils.h | 3 +- include/in_mem_filter_store.h | 8 ++--- include/index.h | 5 +-- include/utils.h | 2 +- src/abstract_index.cpp | 54 ++++++++++++++++----------------- src/disk_utils.cpp | 31 +++++++++---------- src/filter_utils.cpp | 3 +- src/in_mem_filter_store.cpp | 31 +++++++++---------- src/index.cpp | 12 +++----- 12 files changed, 78 insertions(+), 82 deletions(-) diff --git a/apps/build_stitched_index.cpp b/apps/build_stitched_index.cpp index e460c4306..6bca8b0f6 100644 --- a/apps/build_stitched_index.cpp +++ b/apps/build_stitched_index.cpp @@ -346,8 +346,8 @@ int main(int argc, char **argv) path labels_map_file = final_index_path_prefix + "_labels_map.txt"; std::string raw_universal_label = universal_label; - diskann::InMemFilterStore::convert_label_to_numeric(label_data_path, labels_file_to_use, - labels_map_file, raw_universal_label); + diskann::InMemFilterStore::convert_label_to_numeric(label_data_path, labels_file_to_use, labels_map_file, + raw_universal_label); // 2. parse label file and create necessary data structures std::vector point_ids_to_labels; diff --git a/include/abstract_filter_store.h b/include/abstract_filter_store.h index df245e181..95b39801c 100644 --- a/include/abstract_filter_store.h +++ b/include/abstract_filter_store.h @@ -42,10 +42,11 @@ template class AbstractFilterStore // TODO: in future we may accept a set or vector of universal labels // DISKANN_DLLEXPORT virtual void set_universal_label(label_type universal_label) = 0; DISKANN_DLLEXPORT virtual void set_universal_labels(const std::string &universal_labels) = 0; - DISKANN_DLLEXPORT virtual std::pair get_universal_label() = 0; + DISKANN_DLLEXPORT virtual std::pair get_universal_label() = 0; // takes raw label file and then genrate internal mapping file and keep the info of mapping - DISKANN_DLLEXPORT virtual size_t load_raw_labels(const std::string &raw_labels_file, const std::string &raw_universal_label) = 0; + DISKANN_DLLEXPORT virtual size_t load_raw_labels(const std::string &raw_labels_file, + const std::string &raw_universal_label) = 0; DISKANN_DLLEXPORT virtual void save_labels(const std::string &save_path, const size_t total_points) = 0; // For dynamic filtered build, we compact the data and hence location_to_labels, we need the compacted version of diff --git a/include/disk_utils.h b/include/disk_utils.h index 4d5ad9b04..b66cedb76 100644 --- a/include/disk_utils.h +++ b/include/disk_utils.h @@ -66,7 +66,7 @@ DISKANN_DLLEXPORT int merge_shards(const std::string &vamana_prefix, const std:: const uint64_t nshards, uint32_t max_degree, const std::string &output_vamana, const std::string &medoids_file, bool use_filters = false, const std::string &labels_to_medoids_file = std::string(""), - const std::unordered_map& disk_labels_map = {}); + const std::unordered_map &disk_labels_map = {}); DISKANN_DLLEXPORT void extract_shard_labels(const std::string &in_label_file, const std::string &shard_ids_bin, const std::string &shard_label_file); diff --git a/include/filter_utils.h b/include/filter_utils.h index 97a076581..a444f81e7 100644 --- a/include/filter_utils.h +++ b/include/filter_utils.h @@ -57,7 +57,8 @@ DISKANN_DLLEXPORT void generate_label_indices(path input_data_path, path final_i DISKANN_DLLEXPORT load_label_index_return_values load_label_index(path label_index_path, uint32_t label_number_of_points); -DISKANN_DLLEXPORT std::tuple>, tsl::robin_set> parse_raw_label_file(path label_file); +DISKANN_DLLEXPORT std::tuple>, tsl::robin_set> parse_raw_label_file( + path label_file); DISKANN_DLLEXPORT parse_label_file_return_values parse_label_file(path label_data_path, std::string universal_label); diff --git a/include/in_mem_filter_store.h b/include/in_mem_filter_store.h index 818b99172..a1b64a520 100644 --- a/include/in_mem_filter_store.h +++ b/include/in_mem_filter_store.h @@ -17,7 +17,7 @@ template class InMemFilterStore : public AbstractFilterSto const FilterMatchStrategy filter_match_strategy) override; const std::vector &get_labels_by_location(const location_t point_id) override; - //const label_type get_universal_label + // const label_type get_universal_label // Dynamic Index void set_labels_to_location(const location_t location, const std::vector &labels); @@ -34,10 +34,10 @@ template class InMemFilterStore : public AbstractFilterSto const uint32_t &get_medoid_by_label(const label_type &label) override; const std::unordered_map &get_labels_to_medoids() override; bool label_has_medoid(const label_type &label) override; - + // takes raw universal labels and map them internally. void set_universal_labels(const std::string &raw_universal_labels) override; - std::pair get_universal_label() override; + std::pair get_universal_label() override; // ideally takes raw label file and then genrate internal mapping file and keep the info of mapping size_t load_raw_labels(const std::string &raw_labels_file, const std::string &raw_universal_label) override; @@ -81,7 +81,7 @@ template class InMemFilterStore : public AbstractFilterSto // no need of storing raw universal label ? // 1. _use_universal_label can be used to identify if universal label present or not // 2. from _label_map and _mapped_universal_label, we can know what is raw universal label. Hence seems duplicate - //std::string _raw_universal_label; + // std::string _raw_universal_label; // populates _loaction_to labels and _labels from given label file size_t parse_label_file(const std::string &label_file); diff --git a/include/index.h b/include/index.h index 7e853b5e5..021ff8f2f 100644 --- a/include/index.h +++ b/include/index.h @@ -109,7 +109,8 @@ template clas // Filtered Support DISKANN_DLLEXPORT void build_filtered_index(const char *filename, const std::string &label_file, - const size_t num_points_to_load, const std::vector &tags = std::vector()); + const size_t num_points_to_load, + const std::vector &tags = std::vector()); // DISKANN_DLLEXPORT void set_universal_label(const LabelT &label); DISKANN_DLLEXPORT void set_universal_labels(const std::string &raw_labels); @@ -253,7 +254,7 @@ template clas // Calculate best medoids for filter data void calculate_best_medoids(const size_t num_points_to_load, const uint32_t num_candidates); - + // The query to use is placed in scratch->aligned_query std::pair iterate_to_fixed_point(InMemQueryScratch *scratch, const uint32_t Lindex, const std::vector &init_ids, bool use_filter, diff --git a/include/utils.h b/include/utils.h index a2ed10a14..b2a574c87 100644 --- a/include/utils.h +++ b/include/utils.h @@ -177,7 +177,7 @@ inline int delete_file(const std::string &fileName) } inline void convert_label_to_numeric(const std::string &inFileName, const std::string &outFileName, - const std::string &mapFileName, const std::string &unv_label) + const std::string &mapFileName, const std::string &unv_label) { std::unordered_map string_int_map; std::ofstream label_writer(outFileName); diff --git a/src/abstract_index.cpp b/src/abstract_index.cpp index c1147fbaf..c0b4751a1 100644 --- a/src/abstract_index.cpp +++ b/src/abstract_index.cpp @@ -235,33 +235,33 @@ template DISKANN_DLLEXPORT int AbstractIndex::insert_point(cons template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const uint64_t tag); template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const uint64_t tag); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const float *point, const int32_t tag, const std::vector &labels); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const uint8_t *point, const int32_t tag, const std::vector &labels); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const int8_t *point, const int32_t tag, const std::vector &labels); - -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const float *point, const uint32_t tag, const std::vector &labels); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const uint8_t *point, const uint32_t tag, const std::vector &labels); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const int8_t *point, const uint32_t tag, const std::vector &labels); - -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const float *point, const int64_t tag, const std::vector &labels); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const uint8_t *point, const int64_t tag, const std::vector &labels); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const int8_t *point, const int64_t tag, const std::vector &labels); - -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const float *point, const uint64_t tag, const std::vector &labels); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const uint8_t *point, const uint64_t tag, const std::vector &labels); -template DISKANN_DLLEXPORT int AbstractIndex::insert_point( - const int8_t *point, const uint64_t tag, const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const float *point, const int32_t tag, + const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const int32_t tag, + const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const int32_t tag, + const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const float *point, const uint32_t tag, + const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const uint32_t tag, + const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const uint32_t tag, + const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const float *point, const int64_t tag, + const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const int64_t tag, + const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const int64_t tag, + const std::vector &labels); + +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const float *point, const uint64_t tag, + const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const uint8_t *point, const uint64_t tag, + const std::vector &labels); +template DISKANN_DLLEXPORT int AbstractIndex::insert_point(const int8_t *point, const uint64_t tag, + const std::vector &labels); template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete(const int32_t &tag); template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete(const uint32_t &tag); diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index dd088b83c..55d09a404 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -241,7 +241,8 @@ void read_idmap(const std::string &fname, std::vector &ivecs) int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suffix, const std::string &idmaps_prefix, const std::string &idmaps_suffix, const uint64_t nshards, uint32_t max_degree, const std::string &output_vamana, const std::string &medoids_file, bool use_filters, - const std::string &disk_labels_to_medoids_file, const std::unordered_map& disk_labels_map) + const std::string &disk_labels_to_medoids_file, + const std::unordered_map &disk_labels_map) { // Read ID maps std::vector vamana_names(nshards); @@ -641,7 +642,8 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr double sampling_rate, double ram_budget, std::string mem_index_path, std::string medoids_file, std::string centroids_file, size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, - const std::string &disk_labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf) + const std::string &disk_labels_to_medoids_file, const std::string &universal_label, + const uint32_t Lf) { size_t base_num, base_dim; diskann::get_bin_metadata(base_file, base_num, base_dim); @@ -769,8 +771,9 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr const std::string disk_index_path = mem_index_path.substr(0, mem_index_path.size() - 9) + "disk.index"; const std::string disk_labels_file = disk_index_path + "_labels.txt"; const std::string disk_labels_map_file = disk_index_path + "_labels_map.txt"; - const std::unordered_map disk_labels_map = diskann::InMemFilterStore::convert_label_to_numeric( - label_file, disk_labels_file, disk_labels_map_file, universal_label); + const std::unordered_map disk_labels_map = + diskann::InMemFilterStore::convert_label_to_numeric(label_file, disk_labels_file, + disk_labels_map_file, universal_label); diskann::merge_shards(merged_index_prefix + "_subshard-", "_mem.index", merged_index_prefix + "_subshard-", "_ids_uint32.bin", num_parts, R, mem_index_path, medoids_file, use_filters, @@ -1271,7 +1274,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const std::string augmented_data_file, augmented_labels_file; if (use_filters) { - + // convert_label_to_numeric(labels_file_original, labels_file_to_use, disk_labels_int_map_file, // universal_label); augmented_data_file = index_prefix_path + "_augmented_data.bin"; @@ -1477,37 +1480,31 @@ template DISKANN_DLLEXPORT int build_merged_vamana_index( std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, - const std::string &labels_to_medoids_file, - const std::string &universal_label, const uint32_t Lf); + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); template DISKANN_DLLEXPORT int build_merged_vamana_index( std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, - const std::string &labels_to_medoids_file, - const std::string &universal_label, const uint32_t Lf); + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); template DISKANN_DLLEXPORT int build_merged_vamana_index( std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, - const std::string &labels_to_medoids_file, - const std::string &universal_label, const uint32_t Lf); + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); // Label=16_t template DISKANN_DLLEXPORT int build_merged_vamana_index( std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, - const std::string &labels_to_medoids_file, - const std::string &universal_label, const uint32_t Lf); + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); template DISKANN_DLLEXPORT int build_merged_vamana_index( std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, - const std::string &labels_to_medoids_file, - const std::string &universal_label, const uint32_t Lf); + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); template DISKANN_DLLEXPORT int build_merged_vamana_index( std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate, double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file, size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file, - const std::string &labels_to_medoids_file, - const std::string &universal_label, const uint32_t Lf); + const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf); }; // namespace diskann diff --git a/src/filter_utils.cpp b/src/filter_utils.cpp index 29d44228d..adf046954 100644 --- a/src/filter_utils.cpp +++ b/src/filter_utils.cpp @@ -269,7 +269,8 @@ parse_label_file_return_values parse_label_file(path label_data_path, std::strin * 1. a vector of vectors of labels, where the outer vector is indexed by point id * 2. a set of all labels */ -std::tuple>, tsl::robin_set> parse_raw_label_file(std::string label_file) +std::tuple>, tsl::robin_set> parse_raw_label_file( + std::string label_file) { std::vector> pts_to_labels; tsl::robin_set labels; diff --git a/src/in_mem_filter_store.cpp b/src/in_mem_filter_store.cpp index d27eade5e..aa47949a9 100644 --- a/src/in_mem_filter_store.cpp +++ b/src/in_mem_filter_store.cpp @@ -34,7 +34,8 @@ void InMemFilterStore::set_labels_to_location(const location_t locat const std::vector &label_str) { std::vector labels; - for(int i=0; iget_numeric_label(label_str[i])); } _location_to_labels[location] = labels; @@ -90,7 +91,7 @@ void InMemFilterStore::set_universal_labels(const std::string &raw_u { if (raw_universal_label.empty()) { - std::cout << "Warning: empty universal label passed" << std::endl; + std::cout << "Warning: empty universal label passed" << std::endl; } else { @@ -99,12 +100,11 @@ void InMemFilterStore::set_universal_labels(const std::string &raw_u } } -template -std::pair InMemFilterStore::get_universal_label() +template std::pair InMemFilterStore::get_universal_label() { std::pair universal_label; universal_label.second = _universal_label; - if(_has_universal_label) + if (_has_universal_label) { universal_label.first = false; } @@ -116,7 +116,9 @@ std::pair InMemFilterStore::get_universal_label() } // ideally takes raw label file and then genrate internal mapping and keep the info of mapping -template size_t InMemFilterStore::load_raw_labels(const std::string &raw_labels_file, const std::string &raw_universal_label) +template +size_t InMemFilterStore::load_raw_labels(const std::string &raw_labels_file, + const std::string &raw_universal_label) { std::string raw_label_file_path = std::string(raw_labels_file).erase(raw_labels_file.size() - 4); // remove .txt from end @@ -125,7 +127,7 @@ template size_t InMemFilterStore::load_raw_lab raw_label_file_path + "_label_numeric.txt"; // will not be used after parse, can be safely deleted. std::string mem_labels_int_map_file = raw_label_file_path + "_labels_map.txt"; _label_map = InMemFilterStore::convert_label_to_numeric(raw_labels_file, labels_file_to_use, - mem_labels_int_map_file, raw_universal_label); + mem_labels_int_map_file, raw_universal_label); return parse_label_file(labels_file_to_use); } @@ -320,8 +322,7 @@ template void InMemFilterStore::save_label_map map_writer.close(); } -template -label_type InMemFilterStore::get_numeric_label(const std::string &raw_label) +template label_type InMemFilterStore::get_numeric_label(const std::string &raw_label) { if (_label_map.empty()) { @@ -438,8 +439,7 @@ std::unordered_map InMemFilterStore::conver token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); if (string_int_map.find(token) == string_int_map.end()) { - uint32_t nextId = - (uint32_t)string_int_map.size(); + uint32_t nextId = (uint32_t)string_int_map.size(); string_int_map[token] = nextId; } lbls.push_back(string_int_map[token]); @@ -487,16 +487,13 @@ bool InMemFilterStore::detect_common_filters_by_set_intersection( { if (!search_invocation) { - if (std::find(incoming_labels.begin(), incoming_labels.end(), _universal_label) != - incoming_labels.end() || - std::find(curr_node_labels.begin(), curr_node_labels.end(), _universal_label) != - curr_node_labels.end()) + if (std::find(incoming_labels.begin(), incoming_labels.end(), _universal_label) != incoming_labels.end() || + std::find(curr_node_labels.begin(), curr_node_labels.end(), _universal_label) != curr_node_labels.end()) common_filters.insert(_universal_label); } else { - if (std::find(curr_node_labels.begin(), curr_node_labels.end(), _universal_label) != - curr_node_labels.end()) + if (std::find(curr_node_labels.begin(), curr_node_labels.end(), _universal_label) != curr_node_labels.end()) common_filters.insert(_universal_label); } } diff --git a/src/index.cpp b/src/index.cpp index dacd58f86..f7b0244ee 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -540,7 +540,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui _filter_store->load_medoids(labels_to_medoids); _filter_store->load_universal_labels(std::string(filename) + "_universal_label.txt"); } - + _nd = data_file_num_pts - _num_frozen_pts; _empty_slots.clear(); _empty_slots.reserve(_max_points); @@ -663,13 +663,12 @@ template std::vector Inde } template -void Index::calculate_best_medoids(const size_t num_points_to_load, - const uint32_t num_candidates) +void Index::calculate_best_medoids(const size_t num_points_to_load, const uint32_t num_candidates) { _label_to_medoid_id.clear(); std::unordered_map> label_to_points; tsl::robin_set label_set = _filter_store->get_all_label_set(); - + for (uint32_t point_id = 0; point_id < num_points_to_load; point_id++) { for (auto label : _filter_store->get_labels_by_location(point_id)) @@ -723,7 +722,6 @@ void Index::calculate_best_medoids(const size_t num_points_to_l } } - // Find common filter between a node's labels and a given set of labels, while // taking into account universal label template @@ -948,7 +946,7 @@ void Index::search_for_point_and_prune(int location, uint32_t L _data_store->get_vector(location, scratch->aligned_query()); iterate_to_fixed_point(scratch, filteredLindex, filter_specific_start_nodes, true, - _filter_store->get_labels_by_location(location), false); + _filter_store->get_labels_by_location(location), false); // combine candidate pools obtained with filter and unfiltered criteria. std::set best_candidate_pool; @@ -1846,7 +1844,7 @@ std::pair Index::_search_with_filters(const const uint32_t L, std::any &indices, float *distances) { - // Add documentation + // Add documentation // rename to get_numeric_label auto converted_label = _filter_store->get_numeric_label(raw_label); if (typeid(uint64_t *) == indices.type())