-
Notifications
You must be signed in to change notification settings - Fork 237
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding a new PQ Distance Metric and PQ Data Store (#384)
* Added PQ distance hierarchy Changes to CMakelists PQDataStore version that builds correctly Clang-format * Fixing compile issues after rebase to main * minor renaming functions * fixed small bug post rebasing with index factory * Changes to index factory to support PQDataStore * Merged graph_store and pq_data_store * Implementing preprocessing for inmemdatastore * Incorporating code review comments * minor bugfix for PQ data allocation * clang-formatted * Incorporating CR comments * Fixing compile error * minor bug fix + clang-format * Update pq.h * Fixing warnings about struct/class incompatibility --------- Co-authored-by: Gopal Srinivasa <[email protected]> Co-authored-by: ravishankar <[email protected]> Co-authored-by: gopalrs <[email protected]>
- Loading branch information
1 parent
03abc71
commit 5744060
Showing
28 changed files
with
1,179 additions
and
276 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#pragma once | ||
namespace diskann | ||
{ | ||
|
||
template <typename data_t> class PQScratch; | ||
|
||
// By somewhat more than a coincidence, it seems that both InMemQueryScratch | ||
// and SSDQueryScratch have the aligned query and PQScratch objects. So we | ||
// can put them in a neat hierarchy and keep PQScratch as a standalone class. | ||
template <typename data_t> class AbstractScratch | ||
{ | ||
public: | ||
AbstractScratch() = default; | ||
// This class does not take any responsibilty for memory management of | ||
// its members. It is the responsibility of the derived classes to do so. | ||
virtual ~AbstractScratch() = default; | ||
|
||
// Scratch objects should not be copied | ||
AbstractScratch(const AbstractScratch &) = delete; | ||
AbstractScratch &operator=(const AbstractScratch &) = delete; | ||
|
||
data_t *aligned_query_T() | ||
{ | ||
return _aligned_query_T; | ||
} | ||
PQScratch<data_t> *pq_scratch() | ||
{ | ||
return _pq_scratch; | ||
} | ||
|
||
protected: | ||
data_t *_aligned_query_T = nullptr; | ||
PQScratch<data_t> *_pq_scratch = nullptr; | ||
}; | ||
} // namespace diskann |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#pragma once | ||
|
||
#include <string> | ||
#include <sstream> | ||
|
||
#define NUM_PQ_BITS 8 | ||
#define NUM_PQ_CENTROIDS (1 << NUM_PQ_BITS) | ||
#define MAX_OPQ_ITERS 20 | ||
#define NUM_KMEANS_REPS_PQ 12 | ||
#define MAX_PQ_TRAINING_SET_SIZE 256000 | ||
#define MAX_PQ_CHUNKS 512 | ||
|
||
namespace diskann | ||
{ | ||
inline std::string get_quantized_vectors_filename(const std::string &prefix, bool use_opq, uint32_t num_chunks) | ||
{ | ||
return prefix + (use_opq ? "_opq" : "pq") + std::to_string(num_chunks) + "_compressed.bin"; | ||
} | ||
|
||
inline std::string get_pivot_data_filename(const std::string &prefix, bool use_opq, uint32_t num_chunks) | ||
{ | ||
return prefix + (use_opq ? "_opq" : "pq") + std::to_string(num_chunks) + "_pivots.bin"; | ||
} | ||
|
||
inline std::string get_rotation_matrix_suffix(const std::string &pivot_data_filename) | ||
{ | ||
return pivot_data_filename + "_rotation_matrix.bin"; | ||
} | ||
|
||
} // namespace diskann |
Oops, something went wrong.