From 497184d0cc23e316fe14adac538d73d42f3f4374 Mon Sep 17 00:00:00 2001 From: Sanhaoji2 Date: Wed, 27 Nov 2024 16:23:30 +0800 Subject: [PATCH] Fix some issue --- include/index.h | 2 + src/index.cpp | 98 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 72 insertions(+), 28 deletions(-) diff --git a/include/index.h b/include/index.h index 3ddae5297..46a771e67 100644 --- a/include/index.h +++ b/include/index.h @@ -413,6 +413,8 @@ template clas size_t release_location(int location); size_t release_locations(const tsl::robin_set &locations); + bool is_frozen_point(uint32_t location) const; + // Resize the index when no slots are left for insertion. // Acquire exclusive _update_lock and _tag_lock before calling. void resize(size_t new_max_points); diff --git a/src/index.cpp b/src/index.cpp index 5227e980a..0ff4fc332 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1914,7 +1914,14 @@ void Index::parse_label_file(const std::string &label_file, siz { line_cnt++; } - _location_to_labels.resize(line_cnt, std::vector()); + if (_dynamic_index) + { + _location_to_labels.resize(_max_points, std::vector()); + } + else + { + _location_to_labels.resize(line_cnt, std::vector()); + } infile.clear(); infile.seekg(0, std::ios::beg); @@ -1994,7 +2001,14 @@ void Index::parse_label_file_in_bitset(const std::string& label } _bitmask_buf._bitmask_size = simple_bitmask::get_bitmask_size(num_labels); - _bitmask_buf._buf.resize(line_cnt * _bitmask_buf._bitmask_size, 0); + if (_dynamic_index) + { + _bitmask_buf._buf.resize(_max_points * _bitmask_buf._bitmask_size, 0); + } + else + { + _bitmask_buf._buf.resize(line_cnt * _bitmask_buf._bitmask_size, 0); + } infile.clear(); infile.seekg(0, std::ios::beg); @@ -2621,13 +2635,13 @@ consolidation_report Index::consolidate_deletes(const IndexWrit throw ANNException(err, -1, __FUNCSIG__, __FILE__, __LINE__); } - if (_location_to_tag.size() + _delete_set->size() != _nd) - { - diskann::cerr << "Error: _location_to_tag.size (" << _location_to_tag.size() << ") + _delete_set->size (" - << _delete_set->size() << ") != _nd(" << _nd << ") "; - return consolidation_report(diskann::consolidation_report::status_code::INCONSISTENT_COUNT_ERROR, 0, 0, 0, - 0, 0, 0, 0); - } + //if (_location_to_tag.size() + _delete_set->size() != _nd) + //{ + // diskann::cerr << "Error: _location_to_tag.size (" << _location_to_tag.size() << ") + _delete_set->size (" + // << _delete_set->size() << ") != _nd(" << _nd << ") "; + // return consolidation_report(diskann::consolidation_report::status_code::INCONSISTENT_COUNT_ERROR, 0, 0, 0, + // 0, 0, 0, 0); + //} if (_location_to_tag.size() != _tag_to_location.size()) { @@ -2678,13 +2692,6 @@ consolidation_report Index::consolidate_deletes(const IndexWrit num_calls_to_process_delete += 1; } } - for (int64_t loc = _max_points; loc < (int64_t)(_max_points); loc++) - { - ScratchStoreManager> manager(_query_scratch); - auto scratch = manager.scratch_space(); - process_delete(*old_delete_set, loc, range, maxc, alpha, scratch); - num_calls_to_process_delete += 1; - } std::unique_lock tl(_tag_lock); size_t ret_nd = release_locations(*old_delete_set); @@ -2914,6 +2921,24 @@ size_t Index::release_locations(const tsl::robin_set return _nd; } +template +bool Index::is_frozen_point(uint32_t location) const +{ + if (_filtered_index) + { + for (const auto kv : _label_to_start_id) + { + if (kv.second == location) + { + return true; + } + } + return false; + } + + return _start == location; +} + template void Index::reposition_points(uint32_t old_location_start, uint32_t new_location_start, uint32_t num_locations) @@ -3099,19 +3124,13 @@ int Index::insert_point(const T *point, const TagT tag, const s -1, __FUNCSIG__, __FILE__, __LINE__); } - std::shared_lock shared_ul(_update_lock); - std::unique_lock tl(_tag_lock); - std::unique_lock dl(_delete_lock); - - auto location = reserve_location(); if (_filtered_index) { if (labels.empty()) { - release_location(location); std::cerr << "Error: Can't insert point with tag " + get_tag_string(tag) + - " . there are no labels for the point." - << std::endl; + " . there are no labels for the point." + << std::endl; return -1; } @@ -3123,10 +3142,14 @@ int Index::insert_point(const T *point, const TagT tag, const s return -1; } } - - _location_to_labels[location] = labels; } + std::shared_lock shared_ul(_update_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + + auto location = reserve_location(); + if (location == -1) { #if EXPAND_IF_FULL @@ -3167,6 +3190,18 @@ int Index::insert_point(const T *point, const TagT tag, const s } // cant insert as active pts >= max_pts dl.unlock(); + if (_filtered_index) + { + // _location_to_labels[location] = labels; + auto bitsets = _bitmask_buf.get_bitmask(location); + memset(bitsets, 0, _bitmask_buf._bitmask_size); + simple_bitmask bm(bitsets, _bitmask_buf._bitmask_size); + for (LabelT label : labels) + { + bm.set(label); + } + } + // Insert tag and mapping to location if (_enable_tags) { @@ -3271,7 +3306,11 @@ template int Index assert(_tag_to_location[tag] < _max_points); const auto location = _tag_to_location[tag]; - _delete_set->insert(location); + if (!is_frozen_point(location)) + { + _delete_set->insert(location); + } + _location_to_tag.erase(location); _tag_to_location.erase(tag); return 0; @@ -3298,7 +3337,10 @@ void Index::lazy_delete(const std::vector &tags, std::vec else { const auto location = _tag_to_location[tag]; - _delete_set->insert(location); + if (!is_frozen_point(location)) + { + _delete_set->insert(location); + } _location_to_tag.erase(location); _tag_to_location.erase(tag); }