Skip to content

Commit

Permalink
Delete the mem index file and sample files
Browse files Browse the repository at this point in the history
  • Loading branch information
gopal-msr committed Jul 20, 2024
1 parent 98141ce commit 96e1751
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions src/disk_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
std::string mem_univ_label_file = mem_index_path + "_universal_label.txt";
std::string disk_univ_label_file = disk_index_path + "_universal_label.txt";
std::string disk_labels_int_map_file = disk_index_path + "_labels_map.txt";
std::string dummy_remap_file = disk_index_path + "_dummy_remap.txt"; // remap will be used if we break-up points of
std::string dummy_remap_file = disk_index_path + "_dummy_map.txt"; // remap will be used if we break-up points of
// high label-density to create copies

std::string sample_base_prefix = index_prefix_path + "_sample";
Expand Down Expand Up @@ -1239,9 +1239,6 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
augmented_labels_file = index_prefix_path + "_augmented_labels.txt";
if (filter_threshold != 0)
{
//Changing this filename to "_disk.index_dummy_map.txt" from "_dummy_map.txt" to conform
//to the convention that index files all share the _disk.index prefix.
dummy_remap_file = index_prefix_path + "_disk.index_dummy_map.txt";
breakup_dense_points<T>(data_file_to_use, labels_file_to_use, filter_threshold, augmented_data_file,
augmented_labels_file,
dummy_remap_file); // RKNOTE: This has large memory footprint,
Expand Down Expand Up @@ -1311,11 +1308,11 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
}
diskann::cout << timer.elapsed_seconds_for_step("generating disk layout") << std::endl;

double ten_percent_points = std::ceil(points_num * 0.1);
double num_sample_points =
ten_percent_points > MAX_SAMPLE_POINTS_FOR_WARMUP ? MAX_SAMPLE_POINTS_FOR_WARMUP : ten_percent_points;
double sample_sampling_rate = num_sample_points / points_num;
gen_random_slice<T>(data_file_to_use.c_str(), sample_base_prefix, sample_sampling_rate);
//double ten_percent_points = std::ceil(points_num * 0.1);
//double num_sample_points =
// ten_percent_points > MAX_SAMPLE_POINTS_FOR_WARMUP ? MAX_SAMPLE_POINTS_FOR_WARMUP : ten_percent_points;
//double sample_sampling_rate = num_sample_points / points_num;
//gen_random_slice<T>(data_file_to_use.c_str(), sample_base_prefix, sample_sampling_rate);
if (use_filters)
{
copy_file(labels_file_to_use, disk_labels_file);
Expand All @@ -1331,6 +1328,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
}

std::remove(mem_index_path.c_str());
std::remove((mem_index_path + ".data").c_str());
if (use_disk_pq)
std::remove(disk_pq_compressed_vectors_path.c_str());

Expand Down

0 comments on commit 96e1751

Please sign in to comment.