From 54bdd93e582643d81a8c8469e6abcba08c5bd0a6 Mon Sep 17 00:00:00 2001 From: gopalrs <33950290+gopalrs@users.noreply.github.com> Date: Sun, 6 Oct 2024 23:16:20 +0530 Subject: [PATCH 1/3] Update disk-pq.yml --- .github/workflows/disk-pq.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/disk-pq.yml b/.github/workflows/disk-pq.yml index 6e71e7999..f30f1483b 100644 --- a/.github/workflows/disk-pq.yml +++ b/.github/workflows/disk-pq.yml @@ -109,7 +109,7 @@ jobs: dist/bin/search_disk_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_mips_rand_float_10D_10K_norm1.0_diskpq_sharded --result_path /tmp/res --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/mips_rand_float_10D_10K_norm1.0_10D_1K_norm1.0_gt100 --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 - name: upload data and bin - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: disk-pq path: | From bae427e9186f98da5d63e4f0ada410af415083f8 Mon Sep 17 00:00:00 2001 From: gopalrs <33950290+gopalrs@users.noreply.github.com> Date: Sun, 6 Oct 2024 23:54:29 +0530 Subject: [PATCH 2/3] =?UTF-8?q?Fixing=20dummy=20map=20file=20name=20to=20b?= =?UTF-8?q?e=20consistent=20with=20PQFlashIndex=20and=20del=E2=80=A6=20(#5?= =?UTF-8?q?88)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fixing dummy map file name to be consistent with PQFlashIndex and deleted unnecessary files * Fixed code formatting * Upgrading upload-artifact to v4 --------- Co-authored-by: Gopal Srinivasa --- .github/workflows/build-python-pdoc.yml | 4 ++-- .github/workflows/dynamic-labels.yml | 2 +- .github/workflows/dynamic.yml | 2 +- .github/workflows/in-mem-no-pq.yml | 2 +- .github/workflows/in-mem-pq.yml | 2 +- .github/workflows/labels.yml | 2 +- .github/workflows/multi-sector-disk-pq.yml | 2 +- .github/workflows/perf.yml | 2 +- .github/workflows/push-test.yml | 2 +- src/disk_utils.cpp | 7 ++++--- 10 files changed, 14 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build-python-pdoc.yml b/.github/workflows/build-python-pdoc.yml index 28766ad02..96c729605 100644 --- a/.github/workflows/build-python-pdoc.yml +++ b/.github/workflows/build-python-pdoc.yml @@ -43,13 +43,13 @@ jobs: EOF )" >> $GITHUB_ENV - name: Archive documentation version artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: dependencies path: | dependencies_documentation.txt - name: Archive documentation artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: documentation-site path: | diff --git a/.github/workflows/dynamic-labels.yml b/.github/workflows/dynamic-labels.yml index 0f3b56eb9..43cb6e6bd 100644 --- a/.github/workflows/dynamic-labels.yml +++ b/.github/workflows/dynamic-labels.yml @@ -94,7 +94,7 @@ jobs: dist/bin/search_memory_index --data_type float --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_rand_ins_del.after-concurrent-delete-del2500-7500 --result_path res_stream --query_file data/rand_float_10D_1K_norm1.0.bin --gt_file data/gt100_rand_random10D_1K -K 10 -L 20 40 60 80 100 -T 64 - name: upload data and bin - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dynamic path: | diff --git a/.github/workflows/dynamic.yml b/.github/workflows/dynamic.yml index 35eb6d42d..5be88ddf5 100644 --- a/.github/workflows/dynamic.yml +++ b/.github/workflows/dynamic.yml @@ -67,7 +67,7 @@ jobs: dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_ins_del.after-concurrent-delete-del2500-7500 --result_path data/res_ins_del --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/gt100_random10D_10K-conc-2500-7500 -K 10 -L 20 40 60 80 100 -T 8 --dynamic true --tags 1 - name: upload data and bin - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dynamic path: | diff --git a/.github/workflows/in-mem-no-pq.yml b/.github/workflows/in-mem-no-pq.yml index 0039754d2..84263e1d5 100644 --- a/.github/workflows/in-mem-no-pq.yml +++ b/.github/workflows/in-mem-no-pq.yml @@ -73,7 +73,7 @@ jobs: dist/bin/search_memory_index --data_type uint8 --dist_fn cosine --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50.0 --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/cosine_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 16 32 - name: upload data and bin - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: in-memory-no-pq path: | diff --git a/.github/workflows/in-mem-pq.yml b/.github/workflows/in-mem-pq.yml index f9276adfc..8726d2c65 100644 --- a/.github/workflows/in-mem-pq.yml +++ b/.github/workflows/in-mem-pq.yml @@ -48,7 +48,7 @@ jobs: dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50.0_buildpq5 --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100 -L 16 32 - name: upload data and bin - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: in-memory-pq path: | diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index 5555f7f84..e08a1571e 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -111,7 +111,7 @@ jobs: dist/bin/search_memory_index --num_threads 48 --data_type uint8 --dist_fn l2 --filter_label 5 --index_path_prefix data/stit_zipf_32_100_64_new --query_file data/rand_uint8_10D_1K_norm50.0.bin --result_path data/zipf_stit_96_10_90_new --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -K 10 -L 16 32 150 - name: upload data and bin - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: labels path: | diff --git a/.github/workflows/multi-sector-disk-pq.yml b/.github/workflows/multi-sector-disk-pq.yml index 8ea55c88d..391d80d64 100644 --- a/.github/workflows/multi-sector-disk-pq.yml +++ b/.github/workflows/multi-sector-disk-pq.yml @@ -52,7 +52,7 @@ jobs: dist/bin/search_disk_index --data_type int8 --dist_fn l2 --fail_if_recall_below 70 --index_path_prefix data/disk_index_l2_rand_int8_4096D_5K_norm1.0_diskfull_oneshot --result_path /tmp/res --query_file data/rand_int8_4096D_1K_norm1.0.bin --gt_file data/l2_rand_int8_4096D_5K_norm1.0_4096D_1K_norm1.0_gt100 --recall_at 5 -L 250 -W 2 --num_nodes_to_cache 100 -T 16 - name: upload data and bin - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: multi-sector-disk-pq path: | diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml index 1595a4221..b399b70cb 100644 --- a/.github/workflows/perf.yml +++ b/.github/workflows/perf.yml @@ -19,7 +19,7 @@ jobs: mkdir metrics docker run -v ./metrics:/app/logs perf &> ./metrics/combined_stdouterr.log - name: Upload Metrics Logs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: metrics path: | diff --git a/.github/workflows/push-test.yml b/.github/workflows/push-test.yml index 89e6ae018..f9d63ca89 100644 --- a/.github/workflows/push-test.yml +++ b/.github/workflows/push-test.yml @@ -41,7 +41,7 @@ jobs: echo "dependencies" > dependencies_${{ matrix.os }}.txt pipdeptree >> dependencies_${{ matrix.os }}.txt - name: Archive dispannpy dependencies artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dependencies path: | diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index 016560217..22f1e98fd 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -1188,8 +1188,8 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const std::string mem_univ_label_file = mem_index_path + "_universal_label.txt"; std::string disk_univ_label_file = disk_index_path + "_universal_label.txt"; std::string disk_labels_int_map_file = disk_index_path + "_labels_map.txt"; - std::string dummy_remap_file = disk_index_path + "_dummy_remap.txt"; // remap will be used if we break-up points of - // high label-density to create copies + std::string dummy_remap_file = disk_index_path + "_dummy_map.txt"; // remap will be used if we break-up points of + // high label-density to create copies std::string sample_base_prefix = index_prefix_path + "_sample"; // optional, used if disk index file must store pq data @@ -1274,7 +1274,6 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const augmented_labels_file = index_prefix_path + "_augmented_labels.txt"; if (filter_threshold != 0) { - dummy_remap_file = index_prefix_path + "_dummy_remap.txt"; breakup_dense_points(data_file_to_use, labels_file_to_use, filter_threshold, augmented_data_file, augmented_labels_file, dummy_remap_file); // RKNOTE: This has large memory footprint, @@ -1365,6 +1364,8 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const if (created_temp_file_for_processed_data) std::remove(prepped_base.c_str()); std::remove(mem_index_path.c_str()); + std::remove((mem_index_path + ".data").c_str()); + std::remove((mem_index_path + ".tags").c_str()); if (use_disk_pq) std::remove(disk_pq_compressed_vectors_path.c_str()); From 6f2691c726f911ce0d288a1d6854c9885cc52a5a Mon Sep 17 00:00:00 2001 From: gopalrs <33950290+gopalrs@users.noreply.github.com> Date: Tue, 15 Oct 2024 17:33:49 +0530 Subject: [PATCH 3/3] Fix the code hang while caching nodes if search is run with a single thread (#590) * Fixing other places with the single search thread hang issue * Fixing other places with the single search thread hang issue * Adding overwrite option to delete old versions * Add a run id to files to avoid naming conflicts --------- Co-authored-by: Gopal Srinivasa --- .github/workflows/build-python-pdoc.yml | 3 ++- src/pq_flash_index.cpp | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-python-pdoc.yml b/.github/workflows/build-python-pdoc.yml index 96c729605..444a7ee6e 100644 --- a/.github/workflows/build-python-pdoc.yml +++ b/.github/workflows/build-python-pdoc.yml @@ -47,7 +47,8 @@ jobs: with: name: dependencies path: | - dependencies_documentation.txt + ${{ github.run_id }}-dependencies_documentation.txt + overwrite: true - name: Archive documentation artifacts uses: actions/upload-artifact@v4 with: diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index fbb81d55f..d9ad50617 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -365,11 +365,6 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: } diskann::cout << "Caching " << num_nodes_to_cache << "..." << std::endl; - // borrow thread data - ScratchStoreManager> manager(this->_thread_data); - auto this_thread_data = manager.scratch_space(); - IOContext &ctx = this_thread_data->ctx; - std::unique_ptr> cur_level, prev_level; cur_level = std::make_unique>(); prev_level = std::make_unique>();