From 90db7809c30b6997a8279eef6119c7cbcc1aaf64 Mon Sep 17 00:00:00 2001 From: hliu18 Date: Thu, 5 Oct 2023 13:08:18 -0700 Subject: [PATCH] read from MemoryMappedFile when EXEC_ENV_OLS is defined --- src/pq_flash_index.cpp | 70 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index e26df08d0..39f3e2836 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -567,10 +567,9 @@ void PQFlashIndex::generate_random_labels(std::vector &labels } template -std::unordered_map PQFlashIndex::load_label_map(const std::string &labels_map_file) +std::unordered_map PQFlashIndex::load_label_map(std::basic_istream &map_reader) { std::unordered_map string_to_int_mp; - std::ifstream map_reader(labels_map_file); std::string line, token; LabelT token_as_num; std::string label_str; @@ -647,14 +646,8 @@ inline bool PQFlashIndex::point_has_label(uint32_t point_id, LabelT l } template -void PQFlashIndex::parse_label_file(const std::string &label_file, size_t &num_points_labels) +void PQFlashIndex::parse_label_file(std::basic_istream &infile, size_t &num_points_labels) { - std::ifstream infile(label_file); - if (infile.fail()) - { - throw diskann::ANNException(std::string("Failed to open file ") + label_file, -1); - } - std::string line, token; uint32_t line_cnt = 0; @@ -777,14 +770,44 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons this->_num_points = npts_u64; this->_n_chunks = nchunks_u64; +#ifdef EXEC_ENV_OLS + if (files.fileExists(labels_file)) + { + FileContent &content = files.getContent(labels_file); + std::stringstream infile( + std::string((const char *) content._content, content._size)); +#else if (file_exists(labels_file)) { - parse_label_file(labels_file, num_pts_in_label_file); + std::ifstream infile(labels_file); + if (infile.fail()) + { + throw diskann::ANNException(std::string("Failed to open file ") + labels_file, -1); + } +#endif + parse_label_file(infile, num_pts_in_label_file); assert(num_pts_in_label_file == this->_num_points); - _label_map = load_label_map(labels_map_file); + +#ifdef EXEC_ENV_OLS + FileContent &content = files.getContent(labels_map_file); + std::stringstream map_reader( + std::string((const char *) content._content, content._size)); +#else + std::ifstream map_reader(labels_map_file); +#endif + _label_map = load_label_map(map_reader); + +#ifdef EXEC_ENV_OLS + if (files.fileExists(labels_to_medoids)) + { + FileContent &content = files.getContent(labels_to_medoids); + std::stringstream medoid_stream( + std::string((const char *) content._content, content._size)); +#else if (file_exists(labels_to_medoids)) { std::ifstream medoid_stream(labels_to_medoids); +#endif assert(medoid_stream.is_open()); std::string line, token; @@ -814,9 +837,18 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons } } std::string univ_label_file = std ::string(_disk_index_file) + "_universal_label.txt"; + +#ifdef EXEC_ENV_OLS + if (files.fileExists(univ_label_file)) + { + FileContent &content = files.getContent(univ_label_file); + std::stringstream universal_label_reader( + std::string((const char *) content._content, content._size)); +#else if (file_exists(univ_label_file)) { std::ifstream universal_label_reader(univ_label_file); +#endif assert(universal_label_reader.is_open()); std::string univ_label; universal_label_reader >> univ_label; @@ -824,9 +856,18 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons LabelT label_as_num = (LabelT)std::stoul(univ_label); set_universal_label(label_as_num); } + +#ifdef EXEC_ENV_OLS + if (files.fileExists(dummy_map_file)) + { + FileContent &content = files.getContent(dummy_map_file); + std::stringstream dummy_map_stream( + std::string((const char *) content._content, content._size)); +#else if (file_exists(dummy_map_file)) { std::ifstream dummy_map_stream(dummy_map_file); +#endif assert(dummy_map_stream.is_open()); std::string line, token; @@ -878,14 +919,17 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons } std::string disk_pq_pivots_path = this->_disk_index_file + "_pq_pivots.bin"; - if (file_exists(disk_pq_pivots_path)) +#ifdef EXEC_ENV_OLS + if (files.fileExists(disk_pq_pivots_path)) { _use_disk_index_pq = true; -#ifdef EXEC_ENV_OLS // giving 0 chunks to make the _pq_table infer from the // chunk_offsets file the correct value _disk_pq_table.load_pq_centroid_bin(files, disk_pq_pivots_path.c_str(), 0); #else + if (file_exists(disk_pq_pivots_path)) + { + _use_disk_index_pq = true; // giving 0 chunks to make the _pq_table infer from the // chunk_offsets file the correct value _disk_pq_table.load_pq_centroid_bin(disk_pq_pivots_path.c_str(), 0);