From ebcf94630fe556978937500b94d5b1cf89306fdd Mon Sep 17 00:00:00 2001 From: Sanhaoji2 Date: Wed, 27 Nov 2024 14:39:04 +0800 Subject: [PATCH] tmp save --- include/index.h | 12 +- src/CMakeLists.txt | 2 +- src/dll/CMakeLists.txt | 2 +- src/index.cpp | 366 ++++++++++++++++++----------------------- 4 files changed, 167 insertions(+), 215 deletions(-) diff --git a/include/index.h b/include/index.h index 320942013..3ddae5297 100644 --- a/include/index.h +++ b/include/index.h @@ -190,7 +190,7 @@ template clas DISKANN_DLLEXPORT void load(AlignedFileReader &reader, uint32_t num_threads, uint32_t search_l); #else // Reads the number of frozen points from graph's metadata file section. - DISKANN_DLLEXPORT static size_t get_graph_num_frozen_points(const std::string &graph_file); +// DISKANN_DLLEXPORT static size_t get_graph_num_frozen_points(const std::string &graph_file); DISKANN_DLLEXPORT void load(const char *index_file, uint32_t num_threads, uint32_t search_l); #endif @@ -291,7 +291,7 @@ template clas // repositions frozen points to the end of _data - if they have been moved // during deletion - DISKANN_DLLEXPORT void reposition_frozen_point_to_end(); +// DISKANN_DLLEXPORT void reposition_frozen_point_to_end(); DISKANN_DLLEXPORT void reposition_points(uint32_t old_location_start, uint32_t new_location_start, uint32_t num_locations); @@ -358,7 +358,7 @@ template clas // generates 1 frozen point that will never be deleted from the graph // This is not visible to the user - void generate_frozen_point(); +// void generate_frozen_point(); // determines navigating node of the graph by calculating medoid of datafopt uint32_t calculate_entry_point(); @@ -423,7 +423,7 @@ template clas // graph, mode = _consolidated_order in case of lazy deletion and // _compacted_order in case of eager deletion DISKANN_DLLEXPORT void compact_data(); - DISKANN_DLLEXPORT void compact_frozen_point(); +// DISKANN_DLLEXPORT void compact_frozen_point(); // Remove deleted nodes from adjacency list of node loc // Replace removed neighbors with second order neighbors. @@ -476,8 +476,8 @@ template clas // externally and won't be returned by search. At least 1 frozen point is // needed for a dynamic index. The frozen points have consecutive locations. // See also _start below. - size_t _num_frozen_pts = 0; - size_t _frozen_pts_used = 0; +// size_t _num_frozen_pts = 0; +// size_t _frozen_pts_used = 0; size_t _node_size; size_t _data_len; size_t _neighbor_len; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cbca26440..23a24bbdc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,7 +13,7 @@ else() linux_aligned_file_reader.cpp math_utils.cpp natural_number_map.cpp in_mem_data_store.cpp in_mem_graph_store.cpp natural_number_set.cpp memory_mapper.cpp partition.cpp pq.cpp - pq_flash_index.cpp scratch.cpp logger.cpp utils.cpp filter_utils.cpp index_factory.cpp abstract_index.cpp pq_l2_distance.cpp pq_data_store.cpp) + pq_flash_index.cpp scratch.cpp logger.cpp utils.cpp filter_utils.cpp index_factory.cpp abstract_index.cpp pq_l2_distance.cpp pq_data_store.cpp neighbor_list.cpp in_mem_static_graph_store.cpp) if (RESTAPI) list(APPEND CPP_SOURCES restapi/search_wrapper.cpp restapi/server.cpp) endif() diff --git a/src/dll/CMakeLists.txt b/src/dll/CMakeLists.txt index 096d1b76e..11dde1432 100644 --- a/src/dll/CMakeLists.txt +++ b/src/dll/CMakeLists.txt @@ -4,7 +4,7 @@ add_library(${PROJECT_NAME} SHARED dllmain.cpp ../abstract_data_store.cpp ../partition.cpp ../pq.cpp ../pq_flash_index.cpp ../logger.cpp ../utils.cpp ../windows_aligned_file_reader.cpp ../distance.cpp ../pq_l2_distance.cpp ../memory_mapper.cpp ../index.cpp ../in_mem_data_store.cpp ../pq_data_store.cpp ../in_mem_graph_store.cpp ../math_utils.cpp ../disk_utils.cpp ../filter_utils.cpp - ../ann_exception.cpp ../natural_number_set.cpp ../natural_number_map.cpp ../scratch.cpp ../index_factory.cpp ../abstract_index.cpp) + ../ann_exception.cpp ../natural_number_set.cpp ../natural_number_map.cpp ../scratch.cpp ../index_factory.cpp ../abstract_index.cpp ../neighbor_list.cpp ../in_mem_static_graph_store.cpp) set(TARGET_DIR "$<$:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}>$<$:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}>") diff --git a/src/index.cpp b/src/index.cpp index 4daf2839c..5227e980a 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -36,7 +36,7 @@ Index::Index(const IndexConfig &index_config, std::shared_ptr graph_store, std::shared_ptr> pq_data_store) : _dist_metric(index_config.metric), _dim(index_config.dimension), _max_points(index_config.max_points), - _num_frozen_pts(index_config.num_frozen_pts), _dynamic_index(index_config.dynamic_index), + _dynamic_index(index_config.dynamic_index), _enable_tags(index_config.enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr), _pq_dist(index_config.pq_dist_build), _use_opq(index_config.use_opq), _filtered_index(index_config.filtered_index), _num_pq_chunks(index_config.num_pq_chunks), @@ -60,17 +60,17 @@ Index::Index(const IndexConfig &index_config, std::shared_ptr size_t Index size_t Index 0) - { - std::memset((char *)&tag_data[_start], 0, sizeof(TagT) * _num_frozen_pts); - } + try { - tag_bytes_written = save_bin(tags_file, tag_data, _nd + _num_frozen_pts, 1); + tag_bytes_written = save_bin(tags_file, tag_data, _nd, 1); } catch (std::system_error &e) { @@ -240,7 +237,7 @@ template size_t Indexsave(data_file, (location_t)(_nd + _num_frozen_pts)); + return _data_store->save(data_file, (location_t)(_nd)); } // save the graph index on a file as an adjacency list. For each point, @@ -248,7 +245,7 @@ template size_t Index size_t Index::save_graph(std::string graph_file) { - return _graph_store->store(graph_file, _nd + _num_frozen_pts, _num_frozen_pts, _start); + return _graph_store->store(graph_file, _nd, 0, _start); } template @@ -280,7 +277,7 @@ void Index::save(const char *filename, bool compact_before_save if (compact_before_save) { compact_data(); - compact_frozen_point(); + // compact_frozen_point(); } else { @@ -321,7 +318,7 @@ void Index::save(const char *filename, bool compact_before_save { std::ofstream label_writer(std::string(filename) + "_labels.txt"); assert(label_writer.is_open()); - for (uint32_t i = 0; i < _nd + _num_frozen_pts; i++) + for (uint32_t i = 0; i < _nd; i++) { for (uint32_t j = 0; j + 1 < _location_to_labels[i].size(); j++) { @@ -348,7 +345,7 @@ void Index::save(const char *filename, bool compact_before_save // write updated labels std::ofstream raw_label_writer(std::string(filename) + "_raw_labels.txt"); assert(raw_label_writer.is_open()); - for (uint32_t i = 0; i < _nd + _num_frozen_pts; i++) + for (uint32_t i = 0; i < _nd; i++) { for (uint32_t j = 0; j + 1 < _location_to_labels[i].size(); j++) { @@ -392,7 +389,7 @@ void Index::save(const char *filename, bool compact_before_save // If frozen points were temporarily compacted to _nd, move back to // _max_points. - reposition_frozen_point_to_end(); + //reposition_frozen_point_to_end(); diskann::cout << "Time taken for save: " << timer.elapsed() / 1000000.0 << "s." << std::endl; } @@ -436,7 +433,7 @@ size_t Index::load_tags(const std::string tag_filename) throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } - const size_t num_data_points = file_num_points - _num_frozen_pts; + const size_t num_data_points = file_num_points; _location_to_tag.reserve(num_data_points); _tag_to_location.reserve(num_data_points); for (uint32_t i = 0; i < (uint32_t)num_data_points; i++) @@ -487,10 +484,10 @@ size_t Index::load_data(std::string filename) throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } - if (file_num_points > _max_points + _num_frozen_pts) + if (file_num_points > _max_points) { // update and tag lock acquired in load() before calling load_data - resize(file_num_points - _num_frozen_pts); + resize(file_num_points); } #ifdef EXEC_ENV_OLS @@ -586,7 +583,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui std::stringstream stream; stream << "ERROR: When loading index, loaded " << data_file_num_pts << " points from datafile, " << graph_num_pts << " from graph, and " << tags_file_num_pts - << " tags, with num_frozen_pts being set to " << _num_frozen_pts << " in constructor." << std::endl; + << " tags in constructor." << std::endl; diskann::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -595,7 +592,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui { _label_map = load_label_map(labels_map_file); parse_label_file_in_bitset(labels_file, label_num_pts, _label_map.size()); - assert(label_num_pts == data_file_num_pts - _num_frozen_pts); + assert(label_num_pts == data_file_num_pts); if (file_exists(labels_to_medoids)) { std::ifstream medoid_stream(labels_to_medoids); @@ -637,7 +634,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui } } - _nd = data_file_num_pts - _num_frozen_pts; + _nd = data_file_num_pts; _empty_slots.clear(); _empty_slots.reserve(_max_points); for (auto i = _nd; i < _max_points; i++) @@ -645,8 +642,8 @@ void Index::load(const char *filename, uint32_t num_threads, ui _empty_slots.insert((uint32_t)i); } - reposition_frozen_point_to_end(); - diskann::cout << "Num frozen points:" << _num_frozen_pts << " _nd: " << _nd << " _start: " << _start +// reposition_frozen_point_to_end(); + diskann::cout << " _nd: " << _nd << " _start: " << _start << " size(_location_to_tag): " << _location_to_tag.size() << " size(_tag_to_location):" << _tag_to_location.size() << " Max points: " << _max_points << std::endl; @@ -663,24 +660,24 @@ void Index::load(const char *filename, uint32_t num_threads, ui } #ifndef EXEC_ENV_OLS -template -size_t Index::get_graph_num_frozen_points(const std::string &graph_file) -{ - size_t expected_file_size; - uint32_t max_observed_degree, start; - size_t file_frozen_pts; - - std::ifstream in; - in.exceptions(std::ios::badbit | std::ios::failbit); - - in.open(graph_file, std::ios::binary); - in.read((char *)&expected_file_size, sizeof(size_t)); - in.read((char *)&max_observed_degree, sizeof(uint32_t)); - in.read((char *)&start, sizeof(uint32_t)); - in.read((char *)&file_frozen_pts, sizeof(size_t)); - - return file_frozen_pts; -} +//template +//size_t Index::get_graph_num_frozen_points(const std::string &graph_file) +//{ +// size_t expected_file_size; +// uint32_t max_observed_degree, start; +// size_t file_frozen_pts; +// +// std::ifstream in; +// in.exceptions(std::ios::badbit | std::ios::failbit); +// +// in.open(graph_file, std::ios::binary); +// in.read((char *)&expected_file_size, sizeof(size_t)); +// in.read((char *)&max_observed_degree, sizeof(uint32_t)); +// in.read((char *)&start, sizeof(uint32_t)); +// in.read((char *)&file_frozen_pts, sizeof(size_t)); +// +// return file_frozen_pts; +//} #endif #ifdef EXEC_ENV_OLS @@ -695,7 +692,7 @@ size_t Index::load_graph(std::string filename, size_t expected_ #endif auto res = _graph_store->load(filename, expected_num_points); _start = std::get<1>(res); - _num_frozen_pts = std::get<2>(res); +// _num_frozen_pts = std::get<2>(res); return std::get<0>(res); } @@ -743,18 +740,10 @@ template uint32_t Index std::vector Index::get_init_ids() { std::vector init_ids; - init_ids.reserve(1 + _num_frozen_pts); + init_ids.reserve(1); init_ids.emplace_back(_start); - for (uint32_t frozen = (uint32_t)_max_points; frozen < _max_points + _num_frozen_pts; frozen++) - { - if (frozen != _start) - { - init_ids.emplace_back(frozen); - } - } - return init_ids; } @@ -818,7 +807,7 @@ std::pair Index::iterate_to_fixed_point( } // Decide whether to use bitset or robin set to mark visited nodes - auto total_num_points = _max_points + _num_frozen_pts; + auto total_num_points = _max_points; bool fast_iterate = total_num_points <= MAX_POINTS_FOR_USING_BITSET; if (fast_iterate) @@ -875,7 +864,7 @@ std::pair Index::iterate_to_fixed_point( // Initialize the candidate pool with starting points for (auto id : init_ids) { - if (id >= _max_points + _num_frozen_pts) + if (id >= _max_points) { diskann::cerr << "Out of range loc found as an edge : " << id << std::endl; throw diskann::ANNException(std::string("Wrong loc") + std::to_string(id), -1, __FUNCSIG__, __FILE__, @@ -951,7 +940,7 @@ std::pair Index::iterate_to_fixed_point( auto neighbour_list = _graph_store->get_neighbours(n); for (auto id : neighbour_list) { - assert(id < _max_points + _num_frozen_pts); + assert(id < _max_points); if (!is_not_visited(id)) { @@ -983,7 +972,7 @@ std::pair Index::iterate_to_fixed_point( _locks[n].unlock_shared(); for (auto id : tmp_neighbor_list) { - assert(id < _max_points + _num_frozen_pts); + assert(id < _max_points); if (!is_not_visited(id)) { @@ -1112,7 +1101,7 @@ void Index::search_for_point_and_prune(int location, uint32_t L prune_neighbors(location, pool, pruned_list, scratch); assert(!pruned_list.empty()); - assert(_graph_store->get_total_points() == _max_points + _num_frozen_pts); + assert(_graph_store->get_total_points() == _max_points); } template @@ -1266,7 +1255,7 @@ void Index::inter_insert(uint32_t n, std::vector &pru for (auto des : src_pool) { // des.loc is the loc of the neighbors of n - assert(des < _max_points + _num_frozen_pts); + assert(des < _max_points); bool prune_needed = false; { @@ -1345,23 +1334,14 @@ template void Index visit_order; std::vector pool, tmp; tsl::robin_set visited; - visit_order.reserve(_nd + _num_frozen_pts); + visit_order.reserve(_nd); for (uint32_t i = 0; i < (uint32_t)_nd; i++) { visit_order.emplace_back(i); } - // If there are any frozen points, add them all. - for (uint32_t frozen = (uint32_t)_max_points; frozen < _max_points + _num_frozen_pts; frozen++) - { - visit_order.emplace_back(frozen); - } - // if there are frozen points, the first such one is set to be the _start - if (_num_frozen_pts > 0) - _start = (uint32_t)_max_points; - else - _start = calculate_entry_point(); + _start = calculate_entry_point(); diskann::Timer link_timer; @@ -1450,7 +1430,7 @@ void Index::prune_all_neighbors(const uint32_t max_degree, cons diskann::Timer timer; #pragma omp parallel for - for (int64_t node = 0; node < (int64_t)(_max_points + _num_frozen_pts); node++) + for (int64_t node = 0; node < (int64_t)(_max_points); node++) { if ((size_t)node < _nd || (size_t)node >= _max_points) { @@ -1483,7 +1463,7 @@ void Index::prune_all_neighbors(const uint32_t max_degree, cons diskann::cout << "Prune time : " << timer.elapsed() / 1000 << "ms" << std::endl; size_t max = 0, min = 1 << 30, total = 0, cnt = 0; - for (size_t i = 0; i < _max_points + _num_frozen_pts; i++) + for (size_t i = 0; i < _max_points; i++) { if (i < _nd || i >= _max_points) { @@ -1500,7 +1480,7 @@ void Index::prune_all_neighbors(const uint32_t max_degree, cons if (_nd > 0) { diskann::cout << "Index built with degree: max:" << max - << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) << " min:" << min + << " avg:" << (float)total / (float)(_nd) << " min:" << min << " count(deg<2):" << cnt << std::endl; } } @@ -1514,17 +1494,15 @@ void Index::set_start_points(const T *data, size_t data_count) if (_nd > 0) throw ANNException("Can not set starting point for a non-empty index", -1, __FUNCSIG__, __FILE__, __LINE__); - if (data_count != _num_frozen_pts * _dim) + if (data_count != _dim) throw ANNException("Invalid number of points", -1, __FUNCSIG__, __FILE__, __LINE__); // memcpy(_data + _aligned_dim * _max_points, data, _aligned_dim * // sizeof(T) * _num_frozen_pts); - for (location_t i = 0; i < _num_frozen_pts; i++) - { - _data_store->set_vector((location_t)(i + _max_points), data + i * _dim); - } + _data_store->set_vector((location_t)(_max_points), data); + _has_built = true; - diskann::cout << "Index start points set: #" << _num_frozen_pts << std::endl; + diskann::cout << "Index start points set in location: #" << _max_points << std::endl; } template @@ -1553,24 +1531,21 @@ void Index::set_start_points_at_random(T radius, uint32_t rando std::normal_distribution<> d{0.0, 1.0}; std::vector points_data; - points_data.reserve(_dim * _num_frozen_pts); + points_data.reserve(_dim); std::vector real_vec(_dim); - for (size_t frozen_point = 0; frozen_point < _num_frozen_pts; frozen_point++) + double norm_sq = 0.0; + for (size_t i = 0; i < _dim; ++i) { - double norm_sq = 0.0; - for (size_t i = 0; i < _dim; ++i) - { - auto r = d(gen); - real_vec[i] = r; - norm_sq += r * r; - } - - const double norm = std::sqrt(norm_sq); - for (auto iter : real_vec) - points_data.push_back(static_cast(iter * radius / norm)); + auto r = d(gen); + real_vec[i] = r; + norm_sq += r * r; } + const double norm = std::sqrt(norm_sq); + for (auto iter : real_vec) + points_data.push_back(static_cast(iter * radius / norm)); + set_start_points(points_data.data(), points_data.size()); } @@ -1611,7 +1586,7 @@ void Index::build_with_data_populated(const std::vector & _data_store->get_aligned_dim()); } - generate_frozen_point(); +// generate_frozen_point(); link(); size_t max = 0, min = SIZE_MAX, total = 0, cnt = 0; @@ -1624,7 +1599,7 @@ void Index::build_with_data_populated(const std::vector & if (pool.size() < 2) cnt++; } - diskann::cout << "Index built with degree: max:" << max << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) + diskann::cout << "Index built with degree: max:" << max << " avg:" << (float)total / (float)(_nd) << " min:" << min << " count(deg<2):" << cnt << std::endl; _has_built = true; @@ -2495,39 +2470,39 @@ template size_t Index void Index::generate_frozen_point() -{ - if (_num_frozen_pts == 0) - return; - - if (_num_frozen_pts > 1) - { - throw ANNException("More than one frozen point not supported in generate_frozen_point", -1, __FUNCSIG__, - __FILE__, __LINE__); - } - - if (_nd == 0) - { - throw ANNException("ERROR: Can not pick a frozen point since nd=0", -1, __FUNCSIG__, __FILE__, __LINE__); - } - size_t res = calculate_entry_point(); - - // REFACTOR PQ: Not sure if we should do this for both stores. - if (_pq_dist) - { - // copy the PQ data corresponding to the point returned by - // calculate_entry_point - // memcpy(_pq_data + _max_points * _num_pq_chunks, - // _pq_data + res * _num_pq_chunks, - // _num_pq_chunks * DIV_ROUND_UP(NUM_PQ_BITS, 8)); - _pq_data_store->copy_vectors((location_t)res, (location_t)_max_points, 1); - } - else - { - _data_store->copy_vectors((location_t)res, (location_t)_max_points, 1); - } - _frozen_pts_used++; -} +//template void Index::generate_frozen_point() +//{ +// if (_num_frozen_pts == 0) +// return; +// +// if (_num_frozen_pts > 1) +// { +// throw ANNException("More than one frozen point not supported in generate_frozen_point", -1, __FUNCSIG__, +// __FILE__, __LINE__); +// } +// +// if (_nd == 0) +// { +// throw ANNException("ERROR: Can not pick a frozen point since nd=0", -1, __FUNCSIG__, __FILE__, __LINE__); +// } +// size_t res = calculate_entry_point(); +// +// // REFACTOR PQ: Not sure if we should do this for both stores. +// if (_pq_dist) +// { +// // copy the PQ data corresponding to the point returned by +// // calculate_entry_point +// // memcpy(_pq_data + _max_points * _num_pq_chunks, +// // _pq_data + res * _num_pq_chunks, +// // _num_pq_chunks * DIV_ROUND_UP(NUM_PQ_BITS, 8)); +// _pq_data_store->copy_vectors((location_t)res, (location_t)_max_points, 1); +// } +// else +// { +// _data_store->copy_vectors((location_t)res, (location_t)_max_points, 1); +// } +// _frozen_pts_used++; +//} template int Index::enable_delete() { @@ -2703,7 +2678,7 @@ consolidation_report Index::consolidate_deletes(const IndexWrit num_calls_to_process_delete += 1; } } - for (int64_t loc = _max_points; loc < (int64_t)(_max_points + _num_frozen_pts); loc++) + for (int64_t loc = _max_points; loc < (int64_t)(_max_points); loc++) { ScratchStoreManager> manager(_query_scratch); auto scratch = manager.scratch_space(); @@ -2732,25 +2707,25 @@ consolidation_report Index::consolidate_deletes(const IndexWrit duration); } -template void Index::compact_frozen_point() -{ - if (_nd < _max_points && _num_frozen_pts > 0) - { - reposition_points((uint32_t)_max_points, (uint32_t)_nd, (uint32_t)_num_frozen_pts); - _start = (uint32_t)_nd; - - if (_filtered_index && _dynamic_index) - { - // update medoid id's as frozen points are treated as medoid - for (auto &[label, medoid_id] : _label_to_start_id) - { - /* if (label == _universal_label) - continue;*/ - _label_to_start_id[label] = (uint32_t)_nd + (medoid_id - (uint32_t)_max_points); - } - } - } -} +//template void Index::compact_frozen_point() +//{ +// if (_nd < _max_points && _num_frozen_pts > 0) +// { +// reposition_points((uint32_t)_max_points, (uint32_t)_nd, (uint32_t)_num_frozen_pts); +// _start = (uint32_t)_nd; +// +// if (_filtered_index && _dynamic_index) +// { +// // update medoid id's as frozen points are treated as medoid +// for (auto &[label, medoid_id] : _label_to_start_id) +// { +// /* if (label == _universal_label) +// continue;*/ +// _label_to_start_id[label] = (uint32_t)_nd + (medoid_id - (uint32_t)_max_points); +// } +// } +// } +//} // Should be called after acquiring _update_lock template void Index::compact_data() @@ -2774,7 +2749,7 @@ template void Index new_location = std::vector(_max_points + _num_frozen_pts, UINT32_MAX); + std::vector new_location = std::vector(_max_points, UINT32_MAX); uint32_t new_counter = 0; std::set empty_locations; @@ -2790,10 +2765,6 @@ template void Index void Index new_adj_list; - if ((new_location[old] < _max_points) // If point continues to exist - || (old >= _max_points && old < _max_points + _num_frozen_pts)) + if (new_location[old] < _max_points) // If point continues to exist { auto neighbour_list = _graph_store->get_neighbours((location_t)old); new_adj_list.reserve(neighbour_list.size()); @@ -2959,7 +2929,7 @@ void Index::reposition_points(uint32_t old_location_start, uint const uint32_t location_delta = new_location_start - old_location_start; std::vector updated_neighbours_location; - for (uint32_t i = 0; i < _max_points + _num_frozen_pts; i++) + for (uint32_t i = 0; i < _max_points; i++) { auto i_neighbours = _graph_store->get_neighbours((location_t)i); std::vector i_neighbours_copy; @@ -3027,35 +2997,35 @@ void Index::reposition_points(uint32_t old_location_start, uint _data_store->move_vectors(old_location_start, new_location_start, num_locations); } -template void Index::reposition_frozen_point_to_end() -{ - if (_num_frozen_pts == 0) - return; - - if (_nd == _max_points) - { - diskann::cout << "Not repositioning frozen point as it is already at the end." << std::endl; - return; - } - - reposition_points((uint32_t)_nd, (uint32_t)_max_points, (uint32_t)_num_frozen_pts); - _start = (uint32_t)_max_points; - - // update medoid id's as frozen points are treated as medoid - if (_filtered_index && _dynamic_index) - { - for (auto &[label, medoid_id] : _label_to_start_id) - { - /*if (label == _universal_label) - continue;*/ - _label_to_start_id[label] = (uint32_t)_max_points + (medoid_id - (uint32_t)_nd); - } - } -} +//template void Index::reposition_frozen_point_to_end() +//{ +// if (_num_frozen_pts == 0) +// return; +// +// if (_nd == _max_points) +// { +// diskann::cout << "Not repositioning frozen point as it is already at the end." << std::endl; +// return; +// } +// +// reposition_points((uint32_t)_nd, (uint32_t)_max_points, (uint32_t)_num_frozen_pts); +// _start = (uint32_t)_max_points; +// +// // update medoid id's as frozen points are treated as medoid +// if (_filtered_index && _dynamic_index) +// { +// for (auto &[label, medoid_id] : _label_to_start_id) +// { +// /*if (label == _universal_label) +// continue;*/ +// _label_to_start_id[label] = (uint32_t)_max_points + (medoid_id - (uint32_t)_nd); +// } +// } +//} template void Index::resize(size_t new_max_points) { - const size_t new_internal_points = new_max_points + _num_frozen_pts; + const size_t new_internal_points = new_max_points ; auto start = std::chrono::high_resolution_clock::now(); assert(_empty_slots.size() == 0); // should not resize if there are empty slots. @@ -3063,12 +3033,6 @@ template void Indexresize_graph(new_internal_points); _locks = std::vector(new_internal_points); - if (_num_frozen_pts != 0) - { - reposition_points((uint32_t)_max_points, (uint32_t)new_max_points, (uint32_t)_num_frozen_pts); - _start = (uint32_t)new_max_points; - } - _max_points = new_max_points; _empty_slots.reserve(_max_points); for (auto i = _nd; i < _max_points; i++) @@ -3151,28 +3115,16 @@ int Index::insert_point(const T *point, const TagT tag, const s return -1; } - _location_to_labels[location] = labels; - + // don't support new label for (LabelT label : labels) { if (_labels.find(label) == _labels.end()) { - if (_frozen_pts_used >= _num_frozen_pts) - { - throw ANNException( - "Error: For dynamic filtered index, the number of frozen points should be atleast equal " - "to number of unique labels.", - -1); - } - - auto fz_location = (int)(_max_points) + _frozen_pts_used; // as first _fz_point - _labels.insert(label); - _label_to_start_id[label] = (uint32_t)fz_location; - _location_to_labels[fz_location] = {label}; - _data_store->set_vector((location_t)fz_location, point); - _frozen_pts_used++; + return -1; } } + + _location_to_labels[location] = labels; } if (location == -1) @@ -3409,7 +3361,7 @@ template void Index ul(_update_lock); - boost::dynamic_bitset<> visited(_max_points + _num_frozen_pts); + boost::dynamic_bitset<> visited(_max_points); size_t MAX_BFS_LEVELS = 32; auto bfs_sets = new tsl::robin_set[MAX_BFS_LEVELS]; @@ -3417,7 +3369,7 @@ template void Index