Skip to content

Commit

Permalink
clang formatted
Browse files Browse the repository at this point in the history
  • Loading branch information
rakri committed Jan 23, 2024
1 parent 68e1dbf commit 615247a
Show file tree
Hide file tree
Showing 12 changed files with 78 additions and 82 deletions.
4 changes: 2 additions & 2 deletions apps/build_stitched_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ int main(int argc, char **argv)
path labels_map_file = final_index_path_prefix + "_labels_map.txt";

std::string raw_universal_label = universal_label;
diskann::InMemFilterStore<uint32_t>::convert_label_to_numeric(label_data_path, labels_file_to_use,
labels_map_file, raw_universal_label);
diskann::InMemFilterStore<uint32_t>::convert_label_to_numeric(label_data_path, labels_file_to_use, labels_map_file,
raw_universal_label);

// 2. parse label file and create necessary data structures
std::vector<label_set> point_ids_to_labels;
Expand Down
5 changes: 3 additions & 2 deletions include/abstract_filter_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ template <typename label_type> class AbstractFilterStore
// TODO: in future we may accept a set or vector of universal labels
// DISKANN_DLLEXPORT virtual void set_universal_label(label_type universal_label) = 0;
DISKANN_DLLEXPORT virtual void set_universal_labels(const std::string &universal_labels) = 0;
DISKANN_DLLEXPORT virtual std::pair<bool,label_type> get_universal_label() = 0;
DISKANN_DLLEXPORT virtual std::pair<bool, label_type> get_universal_label() = 0;

// takes raw label file and then genrate internal mapping file and keep the info of mapping
DISKANN_DLLEXPORT virtual size_t load_raw_labels(const std::string &raw_labels_file, const std::string &raw_universal_label) = 0;
DISKANN_DLLEXPORT virtual size_t load_raw_labels(const std::string &raw_labels_file,
const std::string &raw_universal_label) = 0;

DISKANN_DLLEXPORT virtual void save_labels(const std::string &save_path, const size_t total_points) = 0;
// For dynamic filtered build, we compact the data and hence location_to_labels, we need the compacted version of
Expand Down
2 changes: 1 addition & 1 deletion include/disk_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ DISKANN_DLLEXPORT int merge_shards(const std::string &vamana_prefix, const std::
const uint64_t nshards, uint32_t max_degree, const std::string &output_vamana,
const std::string &medoids_file, bool use_filters = false,
const std::string &labels_to_medoids_file = std::string(""),
const std::unordered_map<std::string, uint32_t>& disk_labels_map = {});
const std::unordered_map<std::string, uint32_t> &disk_labels_map = {});

DISKANN_DLLEXPORT void extract_shard_labels(const std::string &in_label_file, const std::string &shard_ids_bin,
const std::string &shard_label_file);
Expand Down
3 changes: 2 additions & 1 deletion include/filter_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ DISKANN_DLLEXPORT void generate_label_indices(path input_data_path, path final_i
DISKANN_DLLEXPORT load_label_index_return_values load_label_index(path label_index_path,
uint32_t label_number_of_points);

DISKANN_DLLEXPORT std::tuple<std::vector<std::vector<std::string>>, tsl::robin_set<std::string>> parse_raw_label_file(path label_file);
DISKANN_DLLEXPORT std::tuple<std::vector<std::vector<std::string>>, tsl::robin_set<std::string>> parse_raw_label_file(
path label_file);

DISKANN_DLLEXPORT parse_label_file_return_values parse_label_file(path label_data_path, std::string universal_label);

Expand Down
8 changes: 4 additions & 4 deletions include/in_mem_filter_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ template <typename label_type> class InMemFilterStore : public AbstractFilterSto
const FilterMatchStrategy filter_match_strategy) override;

const std::vector<label_type> &get_labels_by_location(const location_t point_id) override;
//const label_type get_universal_label
// const label_type get_universal_label

// Dynamic Index
void set_labels_to_location(const location_t location, const std::vector<std::string> &labels);
Expand All @@ -34,10 +34,10 @@ template <typename label_type> class InMemFilterStore : public AbstractFilterSto
const uint32_t &get_medoid_by_label(const label_type &label) override;
const std::unordered_map<label_type, uint32_t> &get_labels_to_medoids() override;
bool label_has_medoid(const label_type &label) override;

// takes raw universal labels and map them internally.
void set_universal_labels(const std::string &raw_universal_labels) override;
std::pair<bool,label_type> get_universal_label() override;
std::pair<bool, label_type> get_universal_label() override;

// ideally takes raw label file and then genrate internal mapping file and keep the info of mapping
size_t load_raw_labels(const std::string &raw_labels_file, const std::string &raw_universal_label) override;
Expand Down Expand Up @@ -81,7 +81,7 @@ template <typename label_type> class InMemFilterStore : public AbstractFilterSto
// no need of storing raw universal label ?
// 1. _use_universal_label can be used to identify if universal label present or not
// 2. from _label_map and _mapped_universal_label, we can know what is raw universal label. Hence seems duplicate
//std::string _raw_universal_label;
// std::string _raw_universal_label;

// populates _loaction_to labels and _labels from given label file
size_t parse_label_file(const std::string &label_file);
Expand Down
5 changes: 3 additions & 2 deletions include/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas

// Filtered Support
DISKANN_DLLEXPORT void build_filtered_index(const char *filename, const std::string &label_file,
const size_t num_points_to_load, const std::vector<TagT> &tags = std::vector<TagT>());
const size_t num_points_to_load,
const std::vector<TagT> &tags = std::vector<TagT>());

// DISKANN_DLLEXPORT void set_universal_label(const LabelT &label);
DISKANN_DLLEXPORT void set_universal_labels(const std::string &raw_labels);
Expand Down Expand Up @@ -253,7 +254,7 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas

// Calculate best medoids for filter data
void calculate_best_medoids(const size_t num_points_to_load, const uint32_t num_candidates);

// The query to use is placed in scratch->aligned_query
std::pair<uint32_t, uint32_t> iterate_to_fixed_point(InMemQueryScratch<T> *scratch, const uint32_t Lindex,
const std::vector<uint32_t> &init_ids, bool use_filter,
Expand Down
2 changes: 1 addition & 1 deletion include/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ inline int delete_file(const std::string &fileName)
}

inline void convert_label_to_numeric(const std::string &inFileName, const std::string &outFileName,
const std::string &mapFileName, const std::string &unv_label)
const std::string &mapFileName, const std::string &unv_label)
{
std::unordered_map<std::string, uint32_t> string_int_map;
std::ofstream label_writer(outFileName);
Expand Down
54 changes: 27 additions & 27 deletions src/abstract_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,33 +235,33 @@ template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint64_t>(cons
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t>(const uint8_t *point, const uint64_t tag);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t>(const int8_t *point, const uint64_t tag);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int32_t>(
const float *point, const int32_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int32_t>(
const uint8_t *point, const int32_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int32_t>(
const int8_t *point, const int32_t tag, const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint32_t>(
const float *point, const uint32_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint32_t>(
const uint8_t *point, const uint32_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint32_t>(
const int8_t *point, const uint32_t tag, const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int64_t>(
const float *point, const int64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int64_t>(
const uint8_t *point, const int64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int64_t>(
const int8_t *point, const int64_t tag, const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint64_t>(
const float *point, const uint64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t>(
const uint8_t *point, const uint64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t>(
const int8_t *point, const uint64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int32_t>(const float *point, const int32_t tag,
const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int32_t>(const uint8_t *point, const int32_t tag,
const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int32_t>(const int8_t *point, const int32_t tag,
const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint32_t>(const float *point, const uint32_t tag,
const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint32_t>(const uint8_t *point, const uint32_t tag,
const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint32_t>(const int8_t *point, const uint32_t tag,
const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int64_t>(const float *point, const int64_t tag,
const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int64_t>(const uint8_t *point, const int64_t tag,
const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int64_t>(const int8_t *point, const int64_t tag,
const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint64_t>(const float *point, const uint64_t tag,
const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t>(const uint8_t *point, const uint64_t tag,
const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t>(const int8_t *point, const uint64_t tag,
const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<int32_t>(const int32_t &tag);
template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<uint32_t>(const uint32_t &tag);
Expand Down
31 changes: 14 additions & 17 deletions src/disk_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,8 @@ void read_idmap(const std::string &fname, std::vector<uint32_t> &ivecs)
int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suffix, const std::string &idmaps_prefix,
const std::string &idmaps_suffix, const uint64_t nshards, uint32_t max_degree,
const std::string &output_vamana, const std::string &medoids_file, bool use_filters,
const std::string &disk_labels_to_medoids_file, const std::unordered_map<std::string, uint32_t>& disk_labels_map)
const std::string &disk_labels_to_medoids_file,
const std::unordered_map<std::string, uint32_t> &disk_labels_map)
{
// Read ID maps
std::vector<std::string> vamana_names(nshards);
Expand Down Expand Up @@ -641,7 +642,8 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr
double sampling_rate, double ram_budget, std::string mem_index_path,
std::string medoids_file, std::string centroids_file, size_t build_pq_bytes, bool use_opq,
uint32_t num_threads, bool use_filters, const std::string &label_file,
const std::string &disk_labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf)
const std::string &disk_labels_to_medoids_file, const std::string &universal_label,
const uint32_t Lf)
{
size_t base_num, base_dim;
diskann::get_bin_metadata(base_file, base_num, base_dim);
Expand Down Expand Up @@ -769,8 +771,9 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr
const std::string disk_index_path = mem_index_path.substr(0, mem_index_path.size() - 9) + "disk.index";
const std::string disk_labels_file = disk_index_path + "_labels.txt";
const std::string disk_labels_map_file = disk_index_path + "_labels_map.txt";
const std::unordered_map<std::string, uint32_t> disk_labels_map = diskann::InMemFilterStore<uint32_t>::convert_label_to_numeric(
label_file, disk_labels_file, disk_labels_map_file, universal_label);
const std::unordered_map<std::string, uint32_t> disk_labels_map =
diskann::InMemFilterStore<uint32_t>::convert_label_to_numeric(label_file, disk_labels_file,
disk_labels_map_file, universal_label);

diskann::merge_shards(merged_index_prefix + "_subshard-", "_mem.index", merged_index_prefix + "_subshard-",
"_ids_uint32.bin", num_parts, R, mem_index_path, medoids_file, use_filters,
Expand Down Expand Up @@ -1271,7 +1274,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
std::string augmented_data_file, augmented_labels_file;
if (use_filters)
{

// convert_label_to_numeric(labels_file_original, labels_file_to_use, disk_labels_int_map_file,
// universal_label);
augmented_data_file = index_prefix_path + "_augmented_data.bin";
Expand Down Expand Up @@ -1477,37 +1480,31 @@ template DISKANN_DLLEXPORT int build_merged_vamana_index<int8_t, uint32_t>(
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
const std::string &labels_to_medoids_file,
const std::string &universal_label, const uint32_t Lf);
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
template DISKANN_DLLEXPORT int build_merged_vamana_index<float, uint32_t>(
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
const std::string &labels_to_medoids_file,
const std::string &universal_label, const uint32_t Lf);
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
template DISKANN_DLLEXPORT int build_merged_vamana_index<uint8_t, uint32_t>(
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
const std::string &labels_to_medoids_file,
const std::string &universal_label, const uint32_t Lf);
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
// Label=16_t
template DISKANN_DLLEXPORT int build_merged_vamana_index<int8_t, uint16_t>(
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
const std::string &labels_to_medoids_file,
const std::string &universal_label, const uint32_t Lf);
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
template DISKANN_DLLEXPORT int build_merged_vamana_index<float, uint16_t>(
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
const std::string &labels_to_medoids_file,
const std::string &universal_label, const uint32_t Lf);
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
template DISKANN_DLLEXPORT int build_merged_vamana_index<uint8_t, uint16_t>(
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
const std::string &labels_to_medoids_file,
const std::string &universal_label, const uint32_t Lf);
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
}; // namespace diskann
3 changes: 2 additions & 1 deletion src/filter_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ parse_label_file_return_values parse_label_file(path label_data_path, std::strin
* 1. a vector of vectors of labels, where the outer vector is indexed by point id
* 2. a set of all labels
*/
std::tuple<std::vector<std::vector<std::string>>, tsl::robin_set<std::string>> parse_raw_label_file(std::string label_file)
std::tuple<std::vector<std::vector<std::string>>, tsl::robin_set<std::string>> parse_raw_label_file(
std::string label_file)
{
std::vector<std::vector<std::string>> pts_to_labels;
tsl::robin_set<std::string> labels;
Expand Down
Loading

0 comments on commit 615247a

Please sign in to comment.