Skip to content

Commit

Permalink
clang-formatted
Browse files Browse the repository at this point in the history
  • Loading branch information
rakri committed Oct 7, 2023
1 parent 3ba680d commit 84f1ee4
Show file tree
Hide file tree
Showing 12 changed files with 78 additions and 72 deletions.
3 changes: 2 additions & 1 deletion apps/build_stitched_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,8 @@ void prune_and_save(path final_index_path_prefix, path full_index_path_prefix, p

diskann::get_bin_metadata(input_data_path, number_of_label_points, dimension);

diskann::Index<T> index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, false, false, false, 0, false);
diskann::Index<T> index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, false,
false, false, 0, false);

// not searching this index, set search_l to 0
index.load(full_index_path_prefix.c_str(), num_threads, 1);
Expand Down
3 changes: 2 additions & 1 deletion apps/utils/count_bfs_levels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ template <typename T> void bfs_count(const std::string &index_path, uint32_t dat
{
using TagT = uint32_t;
using LabelT = uint32_t;
diskann::Index<T, TagT, LabelT> index(diskann::Metric::L2, data_dims, 0, nullptr, nullptr, 0, false, false, false, false, 0, false);
diskann::Index<T, TagT, LabelT> index(diskann::Metric::L2, data_dims, 0, nullptr, nullptr, 0, false, false, false,
false, 0, false);
std::cout << "Index class instantiated" << std::endl;
index.load(index_path.c_str(), 1, 100);
std::cout << "Index loaded" << std::endl;
Expand Down
17 changes: 9 additions & 8 deletions include/abstract_data_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,11 @@ template <typename data_t> class AbstractDataStore
// num_points) to zero
virtual void copy_vectors(const location_t from_loc, const location_t to_loc, const location_t num_points) = 0;

//With the PQ Data Store PR, we have also changed iterate_to_fixed_point to NOT take the query
//from the scratch object. Therefore every data store has to implement preprocess_query which
//at the least will be to copy the query into the scratch object. So making this pure virtual.
virtual void preprocess_query(const data_t *aligned_query, AbstractScratch<data_t> *query_scratch = nullptr) const = 0;
// With the PQ Data Store PR, we have also changed iterate_to_fixed_point to NOT take the query
// from the scratch object. Therefore every data store has to implement preprocess_query which
// at the least will be to copy the query into the scratch object. So making this pure virtual.
virtual void preprocess_query(const data_t *aligned_query,
AbstractScratch<data_t> *query_scratch = nullptr) const = 0;
// distance functions.
virtual float get_distance(const data_t *query, const location_t loc) const = 0;
virtual void get_distance(const data_t *query, const location_t *locations, const uint32_t location_count,
Expand All @@ -98,10 +99,10 @@ template <typename data_t> class AbstractDataStore
// in the dataset
virtual location_t calculate_medoid() const = 0;

//REFACTOR PQ TODO: Each data store knows about its distance function, so this is
//redundant. However, we don't have an OptmizedDataStore yet, and to preserve code
//compability, we are exposing this function.
virtual Distance<data_t>* get_dist_fn() const = 0;
// REFACTOR PQ TODO: Each data store knows about its distance function, so this is
// redundant. However, we don't have an OptmizedDataStore yet, and to preserve code
// compability, we are exposing this function.
virtual Distance<data_t> *get_dist_fn() const = 0;

// search helpers
// if the base data is aligned per the request of the metric, this will tell
Expand Down
7 changes: 4 additions & 3 deletions include/in_mem_data_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,15 @@ template <typename data_t> class InMemDataStore : public AbstractDataStore<data_
virtual float get_distance(const data_t *preprocessed_query, const location_t loc) const override;
virtual float get_distance(const location_t loc1, const location_t loc2) const override;

virtual void get_distance(const data_t *preprocessed_query, const location_t *locations, const uint32_t location_count,
float *distances, AbstractScratch<data_t> *scratch) const override;
virtual void get_distance(const data_t *preprocessed_query, const location_t *locations,
const uint32_t location_count, float *distances,
AbstractScratch<data_t> *scratch) const override;
virtual void get_distance(const data_t *preprocessed_query, const std::vector<location_t> &ids,
std::vector<float> &distances, AbstractScratch<data_t> *scratch_space) const override;

virtual location_t calculate_medoid() const override;

virtual Distance<data_t>* get_dist_fn() const override;
virtual Distance<data_t> *get_dist_fn() const override;

virtual size_t get_alignment_factor() const override;

Expand Down
8 changes: 2 additions & 6 deletions include/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
const bool pq_dist_build = false, const size_t num_pq_chunks = 0,
const bool use_opq = false, const bool filtered_index = false);



DISKANN_DLLEXPORT ~Index();

// Saves graph, data, metadata and associated tags.
Expand Down Expand Up @@ -255,10 +253,9 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
// with iterate_to_fixed_point.
std::vector<uint32_t> get_init_ids();

//The query to use is placed in scratch->aligned_query
// The query to use is placed in scratch->aligned_query
std::pair<uint32_t, uint32_t> iterate_to_fixed_point(InMemQueryScratch<T> *scratch, const uint32_t Lindex,
const std::vector<uint32_t> &init_ids,
bool use_filter,
const std::vector<uint32_t> &init_ids, bool use_filter,
const std::vector<LabelT> &filters, bool search_invocation);

void search_for_point_and_prune(int location, uint32_t Lindex, std::vector<uint32_t> &pruned_list,
Expand Down Expand Up @@ -340,7 +337,6 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
// Data
std::shared_ptr<AbstractDataStore<T>> _data_store;


// Graph related data structures
std::unique_ptr<AbstractGraphStore> _graph_store;

Expand Down
8 changes: 2 additions & 6 deletions include/index_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
#include "in_mem_graph_store.h"
#include "pq_data_store.h"



namespace diskann
{
class IndexFactory
Expand All @@ -13,15 +11,13 @@ class IndexFactory
DISKANN_DLLEXPORT explicit IndexFactory(const IndexConfig &config);
DISKANN_DLLEXPORT std::unique_ptr<AbstractIndex> create_instance();


DISKANN_DLLEXPORT static std::unique_ptr<AbstractGraphStore> construct_graphstore(
const GraphStoreStrategy stratagy, const size_t size, const size_t reserve_graph_degree);

template <typename T>
DISKANN_DLLEXPORT static std::shared_ptr<AbstractDataStore<T>> construct_datastore(DataStoreStrategy stratagy,
size_t num_points,
size_t dimension,
Metric m);
size_t dimension, Metric m);
// For now PQDataStore incorporates within itself all variants of quantization that we support. In the
// future it may be necessary to introduce an AbstractPQDataStore class to spearate various quantization
// flavours.
Expand All @@ -33,7 +29,7 @@ class IndexFactory
template <typename T> static Distance<T> *construct_inmem_distance_fn(Metric m);

private:
void check_config();
void check_config();

template <typename data_type, typename tag_type, typename label_type>
std::unique_ptr<AbstractIndex> create_instance();
Expand Down
10 changes: 5 additions & 5 deletions include/pq_data_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@

namespace diskann
{
//REFACTOR TODO: By default, the PQDataStore is an in-memory datastore because both Vamana and
//DiskANN treat it the same way. But with DiskPQ, that may need to change.
// REFACTOR TODO: By default, the PQDataStore is an in-memory datastore because both Vamana and
// DiskANN treat it the same way. But with DiskPQ, that may need to change.
template <typename data_t> class PQDataStore : public AbstractDataStore<data_t>
{

public:
PQDataStore(size_t dim, location_t num_points, size_t num_pq_chunks, std::unique_ptr<Distance<data_t>> distance_fn,
std::unique_ptr<QuantizedDistance<data_t>> pq_distance_fn);
PQDataStore(const PQDataStore&) = delete;
PQDataStore &operator=(const PQDataStore&) = delete;
PQDataStore(const PQDataStore &) = delete;
PQDataStore &operator=(const PQDataStore &) = delete;
~PQDataStore();

// Load quantized vectors from a set of files. Here filename is treated
Expand Down Expand Up @@ -67,7 +67,7 @@ template <typename data_t> class PQDataStore : public AbstractDataStore<data_t>
// We are returning the distance function that is used for full precision
// vectors here, not the PQ distance function. This is because the callers
// all are expecting a Distance<T> not QuantizedDistance<T>.
virtual Distance<data_t>* get_dist_fn() const override;
virtual Distance<data_t> *get_dist_fn() const override;

virtual location_t calculate_medoid() const override;

Expand Down
1 change: 0 additions & 1 deletion src/abstract_data_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ template <typename data_t> location_t AbstractDataStore<data_t>::resize(const lo
}
}


template DISKANN_DLLEXPORT class AbstractDataStore<float>;
template DISKANN_DLLEXPORT class AbstractDataStore<int8_t>;
template DISKANN_DLLEXPORT class AbstractDataStore<uint8_t>;
Expand Down
6 changes: 3 additions & 3 deletions src/in_mem_data_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ template <typename data_t> void InMemDataStore<data_t>::prefetch_vector(const lo
template <typename data_t>
void InMemDataStore<data_t>::preprocess_query(const data_t *query, AbstractScratch<data_t> *query_scratch) const
{
if (query_scratch != nullptr )
if (query_scratch != nullptr)
{
memcpy(query_scratch->aligned_query_T(), query, sizeof(data_t) * this->get_dims());
}
Expand Down Expand Up @@ -218,7 +218,7 @@ float InMemDataStore<data_t>::get_distance(const location_t loc1, const location
(uint32_t)this->_aligned_dim);
}

template<typename data_t>
template <typename data_t>
void InMemDataStore<data_t>::get_distance(const data_t *preprocessed_query, const std::vector<location_t> &ids,
std::vector<float> &distances, AbstractScratch<data_t> *scratch_space) const
{
Expand Down Expand Up @@ -389,7 +389,7 @@ template <typename data_t> location_t InMemDataStore<data_t>::calculate_medoid()
return min_idx;
}

template <typename data_t> Distance<data_t>* InMemDataStore<data_t>::get_dist_fn() const
template <typename data_t> Distance<data_t> *InMemDataStore<data_t>::get_dist_fn() const
{
return this->_distance_fn.get();
}
Expand Down
34 changes: 18 additions & 16 deletions src/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ namespace diskann
// (bin), and initialize max_points
template <typename T, typename TagT, typename LabelT>
Index<T, TagT, LabelT>::Index(const IndexConfig &index_config, std::shared_ptr<AbstractDataStore<T>> data_store,
std::unique_ptr<AbstractGraphStore> graph_store,
std::shared_ptr<AbstractDataStore<T>> pq_data_store)
std::unique_ptr<AbstractGraphStore> graph_store,
std::shared_ptr<AbstractDataStore<T>> pq_data_store)
: _dist_metric(index_config.metric), _dim(index_config.dimension), _max_points(index_config.max_points),
_num_frozen_pts(index_config.num_frozen_pts), _dynamic_index(index_config.dynamic_index),
_enable_tags(index_config.enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr),
Expand Down Expand Up @@ -142,10 +142,13 @@ Index<T, TagT, LabelT>::Index(Metric m, const size_t dim, const size_t max_point
(size_t)((index_parameters == nullptr ? 0 : index_parameters->max_degree) *
defaults::GRAPH_SLACK_FACTOR * 1.05)))
{
if (_pq_dist) {
_pq_data_store =
IndexFactory::construct_pq_datastore<T>(DataStoreStrategy::MEMORY, max_points + num_frozen_pts, dim, m, num_pq_chunks, use_opq);
} else {
if (_pq_dist)
{
_pq_data_store = IndexFactory::construct_pq_datastore<T>(DataStoreStrategy::MEMORY, max_points + num_frozen_pts,
dim, m, num_pq_chunks, use_opq);
}
else
{
_pq_data_store = _data_store;
}
}
Expand Down Expand Up @@ -784,8 +787,8 @@ bool Index<T, TagT, LabelT>::detect_common_filters(uint32_t point_id, bool searc

template <typename T, typename TagT, typename LabelT>
std::pair<uint32_t, uint32_t> Index<T, TagT, LabelT>::iterate_to_fixed_point(
InMemQueryScratch<T> *scratch, const uint32_t Lsize, const std::vector<uint32_t> &init_ids,
bool use_filter, const std::vector<LabelT> &filter_labels, bool search_invocation)
InMemQueryScratch<T> *scratch, const uint32_t Lsize, const std::vector<uint32_t> &init_ids, bool use_filter,
const std::vector<LabelT> &filter_labels, bool search_invocation)
{
std::vector<Neighbor> &expanded_nodes = scratch->pool();
NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes();
Expand Down Expand Up @@ -1143,7 +1146,7 @@ void Index<T, TagT, LabelT>::prune_neighbors(const uint32_t location, std::vecto
}

// If using _pq_build, over-write the PQ distances with actual distances
//REFACTOR PQ: TODO: How to get rid of this!?
// REFACTOR PQ: TODO: How to get rid of this!?
if (_pq_dist)
{
for (auto &ngh : pool)
Expand Down Expand Up @@ -1629,15 +1632,15 @@ void Index<T, TagT, LabelT>::build(const char *filename, const size_t num_points
throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__);
}

//REFACTOR PQ TODO: We can remove this if and add a check in the InMemDataStore
//to not populate_data if it has been called once.
// REFACTOR PQ TODO: We can remove this if and add a check in the InMemDataStore
// to not populate_data if it has been called once.
if (_pq_dist)
{
#ifdef EXEC_ENV_OLS
std::stringstream ss;
std::stringstream ss;
ss << "PQ Build is not supported in DLVS environment (i.e. if EXEC_ENV_OLS is defined)" << std::endl;
diskann::cerr << ss.str() << std::endl;
throw ANNException(ss.str(),-1, __FUNCSIG__, __FILE__, __LINE__);
throw ANNException(ss.str(), -1, __FUNCSIG__, __FILE__, __LINE__);
#else
// REFACTOR TODO: Both in the previous code and in the current PQDataStore,
// we are writing the PQ files in the same path as the input file. Now we
Expand Down Expand Up @@ -1957,8 +1960,7 @@ std::pair<uint32_t, uint32_t> Index<T, TagT, LabelT>::search(const T *query, con

_data_store->preprocess_query(query, scratch);

auto retval =
iterate_to_fixed_point(scratch, L, init_ids, false, unused_filter_label, true);
auto retval = iterate_to_fixed_point(scratch, L, init_ids, false, unused_filter_label, true);

NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes();

Expand Down Expand Up @@ -2228,7 +2230,7 @@ template <typename T, typename TagT, typename LabelT> void Index<T, TagT, LabelT
}
size_t res = calculate_entry_point();

//REFACTOR PQ: Not sure if we should do this for both stores.
// REFACTOR PQ: Not sure if we should do this for both stores.
if (_pq_dist)
{
// copy the PQ data corresponding to the point returned by
Expand Down
Loading

0 comments on commit 84f1ee4

Please sign in to comment.