Skip to content

Commit

Permalink
clang format
Browse files Browse the repository at this point in the history
  • Loading branch information
rakri committed Jan 2, 2025
1 parent 07e1dfb commit 529e93e
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 53 deletions.
31 changes: 14 additions & 17 deletions apps/search_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@

namespace po = boost::program_options;

#ifdef DISKANN_DEBUG_PRINT_RETSET
void dump_retset(uint64_t test_id, uint64_t query_num, diskann::QueryStats *stats, const std::string &result_output_prefix)
#ifdef DISKANN_DEBUG_PRINT_RETSET
void dump_retset(uint64_t test_id, uint64_t query_num, diskann::QueryStats *stats,
const std::string &result_output_prefix)
{
std::stringstream ss;
if (stats != nullptr)
Expand All @@ -44,12 +45,10 @@ void dump_retset(uint64_t test_id, uint64_t query_num, diskann::QueryStats *stat
ss << i << "\t";
for (int j = 0; j < (stats + i)->query_retset.size(); j++)
{
ss << "(" << (stats + i)->query_retset[j].id << ", " << (stats + i)->query_retset[j].distance
<< "), ";
ss << "(" << (stats + i)->query_retset[j].id << ", " << (stats + i)->query_retset[j].distance << "), ";
}
ss << std::endl;
}

}
std::string results_file = result_output_prefix + "_L" + std::to_string(test_id) + "_retset.tsv";
std::ofstream writer(results_file);
Expand Down Expand Up @@ -148,7 +147,6 @@ void write_gt_to_tsv(const std::string &cur_result_path, uint64_t query_num, uin
}
#endif


void print_stats(std::string category, std::vector<float> percentiles, std::vector<float> results)
{
diskann::cout << std::setw(20) << category << ": " << std::flush;
Expand All @@ -165,10 +163,10 @@ void print_stats(std::string category, std::vector<float> percentiles, std::vect
diskann::cout << std::endl;
}

template<typename T, typename LabelT>
template <typename T, typename LabelT>
void parse_labels_of_query(const std::string &filters_for_query,
std::unique_ptr<diskann::PQFlashIndex<T, LabelT>> &pFlashIndex,
std::vector<LabelT> &label_ids_for_query)
std::unique_ptr<diskann::PQFlashIndex<T, LabelT>> &pFlashIndex,
std::vector<LabelT> &label_ids_for_query)
{
std::vector<std::string> label_strs_for_query;
diskann::split_string(filters_for_query, FILTER_OR_SEPARATOR, label_strs_for_query);
Expand All @@ -178,10 +176,11 @@ void parse_labels_of_query(const std::string &filters_for_query,
}
}

template<typename T, typename LabelT>
template <typename T, typename LabelT>
void populate_label_ids(const std::vector<std::string> &filters_of_queries,
std::unique_ptr<diskann::PQFlashIndex<T, LabelT>> &pFlashIndex,
std::vector<std::vector<LabelT>> &label_ids_of_queries, bool apply_one_to_all, uint32_t query_count)
std::vector<std::vector<LabelT>> &label_ids_of_queries, bool apply_one_to_all,
uint32_t query_count)
{
if (apply_one_to_all)
{
Expand Down Expand Up @@ -332,11 +331,9 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
std::vector<std::vector<LabelT>> per_query_label_ids;
if (filtered_search)
{
populate_label_ids(query_filters, _pFlashIndex, per_query_label_ids, (query_filters.size() == 1), query_num );
populate_label_ids(query_filters, _pFlashIndex, per_query_label_ids, (query_filters.size() == 1), query_num);
}



diskann::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
diskann::cout.precision(2);

Expand Down Expand Up @@ -402,8 +399,8 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
{
_pFlashIndex->cached_beam_search(
query + (i * query_aligned_dim), recall_at, L, query_result_ids_64.data() + (i * recall_at),
query_result_dists[test_id].data() + (i * recall_at), optimized_beamwidth, true, per_query_label_ids[i],
search_io_limit, use_reorder_data, stats + i);
query_result_dists[test_id].data() + (i * recall_at), optimized_beamwidth, true,
per_query_label_ids[i], search_io_limit, use_reorder_data, stats + i);
}
}
auto e = std::chrono::high_resolution_clock::now();
Expand Down Expand Up @@ -448,7 +445,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
<< std::setw(16) << mean_io_us << std::setw(16) << mean_cpuus;
if (calc_recall_flag)
{
diskann::cout << std::setw(16) << recall << std::endl ;
diskann::cout << std::setw(16) << recall << std::endl;
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion include/percentile_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct QueryStats
unsigned n_hops = 0; // # search hops

#ifdef DISKANN_DEBUG_PRINT_RETSET
std::vector<Neighbor> query_retset; //copy of the retset to debug PQ distances.
std::vector<Neighbor> query_retset; // copy of the retset to debug PQ distances.
#endif
};

Expand Down
9 changes: 4 additions & 5 deletions include/pq_flash_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

#define FULL_PRECISION_REORDER_MULTIPLIER 3
#define DEFAULT_VISITED_RESERVE_SIZE 4096
//default max filters per query is set to the same
//as what we expect Bing to provide. If this is overkill,
//it can be set by clients in the load() function
// default max filters per query is set to the same
// as what we expect Bing to provide. If this is overkill,
// it can be set by clients in the load() function
#define DEFAULT_MAX_FILTERS_PER_QUERY 4096

namespace diskann
Expand Down Expand Up @@ -51,8 +51,7 @@ template <typename T, typename LabelT = uint32_t> class PQFlashIndex
#ifdef EXEC_ENV_OLS
DISKANN_DLLEXPORT int load_from_separate_paths(diskann::MemoryMappedFiles &files, uint32_t num_threads,
const char *index_filepath, const char *pivots_filepath,
const char *compressed_filepath,
uint32_t max_filters_per_query);
const char *compressed_filepath, uint32_t max_filters_per_query);
#else
DISKANN_DLLEXPORT int load_from_separate_paths(uint32_t num_threads, const char *index_filepath,
const char *pivots_filepath, const char *compressed_filepath,
Expand Down
59 changes: 32 additions & 27 deletions src/pq_flash_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ template <typename T, typename LabelT> inline T *PQFlashIndex<T, LabelT>::offset
}

template <typename T, typename LabelT>
void PQFlashIndex<T, LabelT>::setup_thread_data(uint64_t nthreads, uint64_t visited_reserve, uint64_t max_filters_per_query)
void PQFlashIndex<T, LabelT>::setup_thread_data(uint64_t nthreads, uint64_t visited_reserve,
uint64_t max_filters_per_query)
{
diskann::cout << "Setting up thread-specific contexts for nthreads: " << nthreads << std::endl;
// omp parallel for to generate unique thread IDs
Expand Down Expand Up @@ -561,7 +562,8 @@ void PQFlashIndex<T, LabelT>::generate_random_labels(std::vector<LabelT> &labels
}

template <typename T, typename LabelT>
void PQFlashIndex<T, LabelT>::load_label_map(std::basic_istream<char> &map_reader, std::unordered_map<std::string, LabelT>& string_to_int_map)
void PQFlashIndex<T, LabelT>::load_label_map(std::basic_istream<char> &map_reader,
std::unordered_map<std::string, LabelT> &string_to_int_map)
{
std::string line, token;
LabelT token_as_num;
Expand Down Expand Up @@ -589,8 +591,7 @@ LabelT PQFlashIndex<T, LabelT>::get_converted_label(const std::string &filter_la
return _universal_filter_label;
}
std::stringstream stream;
stream << "Unable to find label " << filter_label
<< " in the Label Map ";
stream << "Unable to find label " << filter_label << " in the Label Map ";
diskann::cerr << stream.str() << std::endl;
throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__);
}
Expand Down Expand Up @@ -678,7 +679,6 @@ bool PQFlashIndex<T, LabelT>::point_has_any_label(uint32_t point_id, const std::
return ret_val;
}


template <typename T, typename LabelT>
void PQFlashIndex<T, LabelT>::parse_label_file(std::basic_istream<char> &infile, size_t &num_points_labels)
{
Expand Down Expand Up @@ -769,7 +769,8 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::set_univers
}

template <typename T, typename LabelT>
void PQFlashIndex<T, LabelT>::load_label_medoid_map(const std::string& labels_to_medoids_filepath, std::istream& medoid_stream)
void PQFlashIndex<T, LabelT>::load_label_medoid_map(const std::string &labels_to_medoids_filepath,
std::istream &medoid_stream)
{
std::string line, token;

Expand Down Expand Up @@ -831,7 +832,7 @@ void PQFlashIndex<T, LabelT>::load_dummy_map(const std::string &dummy_map_filepa
}
catch (std::system_error &e)
{
throw FileException (dummy_map_filepath, e, __FUNCSIG__, __FILE__, __LINE__);
throw FileException(dummy_map_filepath, e, __FUNCSIG__, __FILE__, __LINE__);
}
}

Expand Down Expand Up @@ -940,10 +941,12 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels

#ifdef EXEC_ENV_OLS
template <typename T, typename LabelT>
int PQFlashIndex<T, LabelT>::load(MemoryMappedFiles &files, uint32_t num_threads, const char *index_prefix, uint32_t max_filters_per_query)
int PQFlashIndex<T, LabelT>::load(MemoryMappedFiles &files, uint32_t num_threads, const char *index_prefix,
uint32_t max_filters_per_query)
{
#else
template <typename T, typename LabelT> int PQFlashIndex<T, LabelT>::load(uint32_t num_threads, const char *index_prefix, uint32_t max_filters_per_query)
template <typename T, typename LabelT>
int PQFlashIndex<T, LabelT>::load(uint32_t num_threads, const char *index_prefix, uint32_t max_filters_per_query)
{
#endif
std::string pq_table_bin = std::string(index_prefix) + "_pq_pivots.bin";
Expand Down Expand Up @@ -1405,16 +1408,18 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
NeighborPriorityQueue &retset = query_scratch->retset;
std::vector<Neighbor> &full_retset = query_scratch->full_retset;
tsl::robin_set<location_t> full_retset_ids;
if (use_filters) {
if (use_filters)
{
uint64_t size_to_reserve = std::max(l_search, (std::min((uint64_t)filter_label_count, this->_max_degree) + 1));
retset.reserve(size_to_reserve);
full_retset.reserve(4096);
full_retset.reserve(4096);
full_retset_ids.reserve(4096);
} else {
}
else
{
retset.reserve(l_search + 1);
}


uint32_t best_medoid = 0;
uint32_t cur_list_size = 0;
float best_dist = (std::numeric_limits<float>::max)();
Expand All @@ -1437,7 +1442,9 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
#endif
visited.insert(best_medoid);
cur_list_size = 1;
} else {
}
else
{
std::vector<location_t> filter_specific_medoids;
filter_specific_medoids.reserve(filter_label_count);
location_t ctr = 0;
Expand All @@ -1455,12 +1462,12 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
for (ctr = 0; ctr < filter_specific_medoids.size(); ctr++)
{
retset.insert(Neighbor(filter_specific_medoids[ctr], dist_scratch[ctr]));
//retset[ctr].id = filter_specific_medoids[ctr];
//retset[ctr].distance = dist_scratch[ctr];
//retset[ctr].expanded = false;
// retset[ctr].id = filter_specific_medoids[ctr];
// retset[ctr].distance = dist_scratch[ctr];
// retset[ctr].expanded = false;
visited.insert(filter_specific_medoids[ctr]);
}
cur_list_size = (uint32_t) filter_specific_medoids.size();
cur_list_size = (uint32_t)filter_specific_medoids.size();
}

uint32_t cmps = 0;
Expand All @@ -1477,10 +1484,10 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
std::vector<std::pair<uint32_t, std::pair<uint32_t, uint32_t *>>> cached_nhoods;
cached_nhoods.reserve(2 * beam_width);

//if we are doing multi-filter search we don't want to restrict the number of IOs
//at present. Must revisit this decision later.
// if we are doing multi-filter search we don't want to restrict the number of IOs
// at present. Must revisit this decision later.
uint32_t max_ios_for_query = use_filters || (io_limit == 0) ? std::numeric_limits<uint32_t>::max() : io_limit;
const std::vector<LabelT>& label_ids = filter_labels; //avoid renaming.
const std::vector<LabelT> &label_ids = filter_labels; // avoid renaming.
std::vector<LabelT> lbl_vec;

while (retset.has_unexpanded_node() && num_ios < max_ios_for_query)
Expand All @@ -1494,9 +1501,8 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
// find new beam
uint32_t num_seen = 0;


for (const auto &lbl : label_ids)
{
{
uint32_t lbl_marker = 0;
while (lbl_marker < cur_list_size)
{
Expand All @@ -1522,7 +1528,8 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
retset[lbl_marker].expanded = true;
if (this->_count_visited_nodes)
{
reinterpret_cast<std::atomic<uint32_t> &>(this->_node_visit_counter[retset[lbl_marker].id].second)
reinterpret_cast<std::atomic<uint32_t> &>(
this->_node_visit_counter[retset[lbl_marker].id].second)
.fetch_add(1);
}
break;
Expand Down Expand Up @@ -1645,7 +1652,6 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
#ifdef DISKANN_DEBUG_PRINT_RETSET
stats->query_retset.push_back(nn);
#endif

}
}
}
Expand Down Expand Up @@ -1687,7 +1693,7 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
full_retset.push_back(Neighbor(real_id, cur_expanded_dist));
full_retset_ids.insert(real_id);
}

uint32_t *node_nbrs = (node_buf + 1);
// compute node_nbrs <-> query dist in PQ space
cpu_timer.reset();
Expand Down Expand Up @@ -1723,7 +1729,6 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
#ifdef DISKANN_DEBUG_PRINT_RETSET
stats->query_retset.push_back(nn);
#endif

}
}

Expand Down
6 changes: 4 additions & 2 deletions src/scratch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ template <typename T> void SSDQueryScratch<T>::reset()
full_retset.clear();
}

template <typename T> SSDQueryScratch<T>::SSDQueryScratch(size_t aligned_dim, size_t visited_reserve, size_t max_filters_per_query)
template <typename T>
SSDQueryScratch<T>::SSDQueryScratch(size_t aligned_dim, size_t visited_reserve, size_t max_filters_per_query)
{
size_t coord_alloc_size = ROUND_UP(sizeof(T) * aligned_dim, 256);

Expand Down Expand Up @@ -124,7 +125,8 @@ template <typename T> SSDQueryScratch<T>::~SSDQueryScratch()
}

template <typename T>
SSDThreadData<T>::SSDThreadData(size_t aligned_dim, size_t visited_reserve, size_t max_filters_per_query) : scratch(aligned_dim, visited_reserve, max_filters_per_query)
SSDThreadData<T>::SSDThreadData(size_t aligned_dim, size_t visited_reserve, size_t max_filters_per_query)
: scratch(aligned_dim, visited_reserve, max_filters_per_query)
{
}

Expand Down
2 changes: 1 addition & 1 deletion src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ double calculate_range_search_recall(uint32_t num_queries, std::vector<std::vect
return total_recall / (num_queries);
}

void split_string(const std::string &string_to_split, const std::string& delimiter, std::vector<std::string> &pieces)
void split_string(const std::string &string_to_split, const std::string &delimiter, std::vector<std::string> &pieces)
{
size_t start = 0;
size_t end;
Expand Down

0 comments on commit 529e93e

Please sign in to comment.