From d9eaf13aa4d32405a7855f2bf7165d412c5f139d Mon Sep 17 00:00:00 2001 From: Gopal Srinivasa Date: Mon, 4 Nov 2024 15:06:54 +0530 Subject: [PATCH] Fixing compile errors and some formatting issues --- include/abstract_filter_store.h | 38 ++-- include/distance.h | 1 + include/in_mem_filter_store.h | 27 --- include/index_build_params.h | 3 +- include/index_config.h | 2 + include/multi_filter/abstract_predicate.h | 14 -- include/multi_filter/filter_matcher.h | 9 - .../multi_filter/simple_boolean_predicate.h | 44 ---- include/restapi/common.h | 12 +- include/restapi/search_wrapper.h | 205 ++++++++---------- include/restapi/server.h | 76 ++++--- src/in_mem_filter_store.cpp | 19 -- src/index.cpp | 8 +- 13 files changed, 160 insertions(+), 298 deletions(-) delete mode 100644 include/multi_filter/abstract_predicate.h delete mode 100644 include/multi_filter/filter_matcher.h delete mode 100644 include/multi_filter/simple_boolean_predicate.h diff --git a/include/abstract_filter_store.h b/include/abstract_filter_store.h index 6b9fe24e3..7afd3490e 100644 --- a/include/abstract_filter_store.h +++ b/include/abstract_filter_store.h @@ -1,35 +1,25 @@ #pragma once -#include "multi_filter/abstract_predicate.h" #include "types.h" +#include "windows_customizations.h" #include + namespace diskann { template class AbstractFilterStore { public: - /// - /// Returns the filters for a data point. Only valid for base points - /// - /// base point id - /// list of filters of the base point - virtual const std::vector & - get_filters_for_point(location_t point) const = 0; + DISKANN_DLLEXPORT virtual bool has_filter_support() const = 0; + + DISKANN_DLLEXPORT virtual bool + point_has_label(location_t point_id, const LabelT label_id) const = 0; - /// - /// Adds filters for a point. - /// - /// - /// - virtual void add_filters_for_point(location_t point, - const std::vector &filters) = 0; + // Returns true if the index is filter-enabled and all files were loaded + // correctly. false otherwise. Note that "false" can mean that the index + // does not have filter support, or that some index files do not exist, or + // that they exist and could not be opened. + DISKANN_DLLEXPORT virtual bool load(const std::string &disk_index_file) = 0; - /// - /// Returns a score between [0,1] indicating how many points in the dataset - /// matched the predicate - /// - /// Predicate to match - /// Score between [0,1] indicate %age of points matching - /// pred - virtual float - get_predicate_selectivity(const AbstractPredicate &pred) const = 0; + DISKANN_DLLEXPORT virtual void + generate_random_labels(std::vector &labels, const uint32_t num_labels, + const uint32_t nthreads) = 0; }; } // namespace diskann diff --git a/include/distance.h b/include/distance.h index 2f261a62f..ae0f1d452 100644 --- a/include/distance.h +++ b/include/distance.h @@ -1,5 +1,6 @@ #pragma once #include "windows_customizations.h" +#include #include namespace diskann { diff --git a/include/in_mem_filter_store.h b/include/in_mem_filter_store.h index bf366ef5f..ec3362b32 100644 --- a/include/in_mem_filter_store.h +++ b/include/in_mem_filter_store.h @@ -26,33 +26,6 @@ class InMemFilterStore : public AbstractFilterStore { DISKANN_DLLEXPORT virtual bool has_filter_support() const; - /// - /// Returns the filters for a data point. Only valid for base points - /// - /// base point id - /// list of filters of the base point - DISKANN_DLLEXPORT virtual const std::vector & - get_filters_for_point(location_t point) const override; - - /// - /// Adds filters for a point. - /// - /// - /// - DISKANN_DLLEXPORT virtual void - add_filters_for_point(location_t point, - const std::vector &filters) override; - - /// - /// Returns a score between [0,1] indicating how many points in the dataset - /// matched the predicate - /// - /// Predicate to match - /// Score between [0,1] indicate %age of points matching - /// pred - DISKANN_DLLEXPORT virtual float - get_predicate_selectivity(const AbstractPredicate &pred) const override; - DISKANN_DLLEXPORT virtual const std::unordered_map> & get_label_to_medoids() const; diff --git a/include/index_build_params.h b/include/index_build_params.h index 100b7f734..d0ce9f4b1 100644 --- a/include/index_build_params.h +++ b/include/index_build_params.h @@ -1,5 +1,6 @@ #pragma once +#include "ann_exception.h" #include "common_includes.h" #include "parameters.h" @@ -29,7 +30,7 @@ class IndexFilterParamsBuilder { IndexFilterParamsBuilder & with_save_path_prefix(const std::string &save_path_prefix) { if (save_path_prefix.empty() || save_path_prefix == "") - throw ANNException("Error: save_path_prefix can't be empty", -1); + throw diskann::ANNException("Error: save_path_prefix can't be empty", -1); this->_save_path_prefix = save_path_prefix; return *this; } diff --git a/include/index_config.h b/include/index_config.h index 3baf2f416..73c7133d1 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -1,6 +1,8 @@ #pragma once +#include "ann_exception.h" #include "common_includes.h" +#include "logger.h" #include "parameters.h" namespace diskann { diff --git a/include/multi_filter/abstract_predicate.h b/include/multi_filter/abstract_predicate.h deleted file mode 100644 index e905f7d72..000000000 --- a/include/multi_filter/abstract_predicate.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include - -namespace diskann -{ - -class AbstractPredicate -{ - public: - virtual ~AbstractPredicate() = 0; - -}; - -} // namespace diskann \ No newline at end of file diff --git a/include/multi_filter/filter_matcher.h b/include/multi_filter/filter_matcher.h deleted file mode 100644 index 4cd06e7c7..000000000 --- a/include/multi_filter/filter_matcher.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once -namespace diskann -{ -class AbstractFilterMatcher -{ - public: - uint32_t get_approximate_match_count(const AbstractFilter& filter) -}; -} diff --git a/include/multi_filter/simple_boolean_predicate.h b/include/multi_filter/simple_boolean_predicate.h deleted file mode 100644 index d86e9d628..000000000 --- a/include/multi_filter/simple_boolean_predicate.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once -#include - -namespace diskann { - -enum BooleanOperator -{ - AND, OR -}; - -/// -/// Represents a simple boolean filter condition with only -/// one kind of operator. The operator can be either AND or -/// OR. The NOT operator is not supported. The predicates -/// are expected to be integers representing predicates -/// provided by the user. -/// -/// -template -class SimpleBooleanPredicate : public AbstractPredicate -{ - public: - SimpleBooleanPredicate(BooleanOperator op) - { - _op = op; - } - void add_predicate(const T &predicate) - { - _predicates.push_back(predicate); - } - const std::vector &get_predicates() const - { - return _predicates; - } - const BooleanOperator get_op() const - { - return _op; - } - -private: - BooleanOperator _op; - std::vector _predicates; -}; -} \ No newline at end of file diff --git a/include/restapi/common.h b/include/restapi/common.h index b8339635a..ec321ec9a 100644 --- a/include/restapi/common.h +++ b/include/restapi/common.h @@ -6,12 +6,14 @@ #include #include -namespace diskann -{ +namespace diskann { // Constants -static const std::string VECTOR_KEY = "query", K_KEY = "k", INDICES_KEY = "indices", DISTANCES_KEY = "distances", - TAGS_KEY = "tags", QUERY_ID_KEY = "query_id", ERROR_MESSAGE_KEY = "error", L_KEY = "Ls", - TIME_TAKEN_KEY = "time_taken_in_us", PARTITION_KEY = "partition", +static const std::string VECTOR_KEY = "query", K_KEY = "k", + INDICES_KEY = "indices", DISTANCES_KEY = "distances", + TAGS_KEY = "tags", QUERY_ID_KEY = "query_id", + ERROR_MESSAGE_KEY = "error", L_KEY = "Ls", + TIME_TAKEN_KEY = "time_taken_in_us", + PARTITION_KEY = "partition", UNKNOWN_ERROR = "unknown_error"; const unsigned int DEFAULT_L = 100; diff --git a/include/restapi/search_wrapper.h b/include/restapi/search_wrapper.h index ebd067d8a..e7ed1725e 100644 --- a/include/restapi/search_wrapper.h +++ b/include/restapi/search_wrapper.h @@ -3,138 +3,113 @@ #pragma once +#include #include #include -#include #include #include -namespace diskann -{ -class SearchResult -{ - public: - SearchResult(unsigned int K, unsigned int elapsed_time_in_ms, const unsigned *const indices, - const float *const distances, const std::string *const tags = nullptr, - const unsigned *const partitions = nullptr); - - const std::vector &get_indices() const - { - return _indices; - } - const std::vector &get_distances() const - { - return _distances; - } - bool tags_enabled() const - { - return _tags_enabled; - } - const std::vector &get_tags() const - { - return _tags; - } - bool partitions_enabled() const - { - return _partitions_enabled; - } - const std::vector &get_partitions() const - { - return _partitions; - } - unsigned get_time() const - { - return _search_time_in_ms; - } - - private: - unsigned int _K; - unsigned int _search_time_in_ms; - std::vector _indices; - std::vector _distances; - - bool _tags_enabled; - std::vector _tags; - - bool _partitions_enabled; - std::vector _partitions; +namespace diskann { +class SearchResult { +public: + SearchResult(unsigned int K, unsigned int elapsed_time_in_ms, + const unsigned *const indices, const float *const distances, + const std::string *const tags = nullptr, + const unsigned *const partitions = nullptr); + + const std::vector &get_indices() const { return _indices; } + const std::vector &get_distances() const { return _distances; } + bool tags_enabled() const { return _tags_enabled; } + const std::vector &get_tags() const { return _tags; } + bool partitions_enabled() const { return _partitions_enabled; } + const std::vector &get_partitions() const { return _partitions; } + unsigned get_time() const { return _search_time_in_ms; } + +private: + unsigned int _K; + unsigned int _search_time_in_ms; + std::vector _indices; + std::vector _distances; + + bool _tags_enabled; + std::vector _tags; + + bool _partitions_enabled; + std::vector _partitions; }; -class SearchNotImplementedException : public std::logic_error -{ - private: - std::string _errormsg; - - public: - SearchNotImplementedException(const char *type) : std::logic_error("Not Implemented") - { - _errormsg = "Search with data type "; - _errormsg += std::string(type); - _errormsg += " not implemented : "; - _errormsg += __FUNCTION__; - } - - virtual const char *what() const throw() - { - return _errormsg.c_str(); - } +class SearchNotImplementedException : public std::logic_error { +private: + std::string _errormsg; + +public: + SearchNotImplementedException(const char *type) + : std::logic_error("Not Implemented") { + _errormsg = "Search with data type "; + _errormsg += std::string(type); + _errormsg += " not implemented : "; + _errormsg += __FUNCTION__; + } + + virtual const char *what() const throw() { return _errormsg.c_str(); } }; -class BaseSearch -{ - public: - BaseSearch(const std::string &tagsFile = nullptr); - virtual SearchResult search(const float *query, const unsigned int dimensions, const unsigned int K, - const unsigned int Ls) - { - throw SearchNotImplementedException("float"); - } - virtual SearchResult search(const int8_t *query, const unsigned int dimensions, const unsigned int K, - const unsigned int Ls) - { - throw SearchNotImplementedException("int8_t"); - } - - virtual SearchResult search(const uint8_t *query, const unsigned int dimensions, const unsigned int K, - const unsigned int Ls) - { - throw SearchNotImplementedException("uint8_t"); - } - - void lookup_tags(const unsigned K, const unsigned *indices, std::string *ret_tags); - - protected: - bool _tags_enabled; - std::vector _tags_str; +class BaseSearch { +public: + BaseSearch(const std::string &tagsFile = nullptr); + virtual SearchResult search(const float *query, const unsigned int dimensions, + const unsigned int K, const unsigned int Ls) { + throw SearchNotImplementedException("float"); + } + virtual SearchResult search(const int8_t *query, + const unsigned int dimensions, + const unsigned int K, const unsigned int Ls) { + throw SearchNotImplementedException("int8_t"); + } + + virtual SearchResult search(const uint8_t *query, + const unsigned int dimensions, + const unsigned int K, const unsigned int Ls) { + throw SearchNotImplementedException("uint8_t"); + } + + void lookup_tags(const unsigned K, const unsigned *indices, + std::string *ret_tags); + +protected: + bool _tags_enabled; + std::vector _tags_str; }; -template class InMemorySearch : public BaseSearch -{ - public: - InMemorySearch(const std::string &baseFile, const std::string &indexFile, const std::string &tagsFile, Metric m, - uint32_t num_threads, uint32_t search_l); - virtual ~InMemorySearch(); +template class InMemorySearch : public BaseSearch { +public: + InMemorySearch(const std::string &baseFile, const std::string &indexFile, + const std::string &tagsFile, Metric m, uint32_t num_threads, + uint32_t search_l); + virtual ~InMemorySearch(); - SearchResult search(const T *query, const unsigned int dimensions, const unsigned int K, const unsigned int Ls); + SearchResult search(const T *query, const unsigned int dimensions, + const unsigned int K, const unsigned int Ls); - private: - unsigned int _dimensions, _numPoints; - std::unique_ptr> _index; +private: + unsigned int _dimensions, _numPoints; + std::unique_ptr> _index; }; -template class PQFlashSearch : public BaseSearch -{ - public: - PQFlashSearch(const std::string &indexPrefix, const unsigned num_nodes_to_cache, const unsigned num_threads, - const std::string &tagsFile, Metric m); - virtual ~PQFlashSearch(); +template class PQFlashSearch : public BaseSearch { +public: + PQFlashSearch(const std::string &indexPrefix, + const unsigned num_nodes_to_cache, const unsigned num_threads, + const std::string &tagsFile, Metric m); + virtual ~PQFlashSearch(); - SearchResult search(const T *query, const unsigned int dimensions, const unsigned int K, const unsigned int Ls); + SearchResult search(const T *query, const unsigned int dimensions, + const unsigned int K, const unsigned int Ls); - private: - unsigned int _dimensions, _numPoints; - std::unique_ptr> _index; - std::shared_ptr reader; +private: + unsigned int _dimensions, _numPoints; + std::unique_ptr> _index; + std::shared_ptr reader; }; } // namespace diskann diff --git a/include/restapi/server.h b/include/restapi/server.h index 1d75847a2..9cb9449da 100644 --- a/include/restapi/server.h +++ b/include/restapi/server.h @@ -3,43 +3,47 @@ #pragma once -#include #include +#include -namespace diskann -{ -class Server -{ - public: - Server(web::uri &url, std::vector> &multi_searcher, - const std::string &typestring); - virtual ~Server(); - - pplx::task open(); - pplx::task close(); - - protected: - template void handle_post(web::http::http_request message); - - template - web::json::value toJsonArray(const std::vector &v, std::function valConverter); - web::json::value prepareResponse(const int64_t &queryId, const int k); - - template - void parseJson(const utility::string_t &body, unsigned int &k, int64_t &queryId, T *&queryVector, - unsigned int &dimensions, unsigned &Ls); - - web::json::value idsToJsonArray(const diskann::SearchResult &result); - web::json::value distancesToJsonArray(const diskann::SearchResult &result); - web::json::value tagsToJsonArray(const diskann::SearchResult &result); - web::json::value partitionsToJsonArray(const diskann::SearchResult &result); - - SearchResult aggregate_results(const unsigned K, const std::vector &results); - - private: - bool _isDebug; - std::unique_ptr _listener; - const bool _multi_search; - std::vector> _multi_searcher; +namespace diskann { +class Server { +public: + Server(web::uri &url, + std::vector> &multi_searcher, + const std::string &typestring); + virtual ~Server(); + + pplx::task open(); + pplx::task close(); + +protected: + template void handle_post(web::http::http_request message); + + template + web::json::value + toJsonArray(const std::vector &v, + std::function valConverter); + web::json::value prepareResponse(const int64_t &queryId, const int k); + + template + void parseJson(const utility::string_t &body, unsigned int &k, + int64_t &queryId, T *&queryVector, unsigned int &dimensions, + unsigned &Ls); + + web::json::value idsToJsonArray(const diskann::SearchResult &result); + web::json::value distancesToJsonArray(const diskann::SearchResult &result); + web::json::value tagsToJsonArray(const diskann::SearchResult &result); + web::json::value partitionsToJsonArray(const diskann::SearchResult &result); + + SearchResult + aggregate_results(const unsigned K, + const std::vector &results); + +private: + bool _isDebug; + std::unique_ptr _listener; + const bool _multi_search; + std::vector> _multi_searcher; }; } // namespace diskann diff --git a/src/in_mem_filter_store.cpp b/src/in_mem_filter_store.cpp index d4f60b826..d5f299063 100644 --- a/src/in_mem_filter_store.cpp +++ b/src/in_mem_filter_store.cpp @@ -1,7 +1,5 @@ #include "in_mem_filter_store.h" #include "ann_exception.h" -#include "multi_filter/abstract_predicate.h" -#include "multi_filter/simple_boolean_predicate.h" #include "tsl/robin_map.h" #include "tsl/robin_set.h" #include "utils.h" @@ -33,23 +31,6 @@ template InMemFilterStore::~InMemFilterStore() { _pts_to_labels = nullptr; } } -template -const std::vector & -InMemFilterStore::get_filters_for_point(location_t point) const { - throw ANNException("Not implemented!", -1); -} - -template -void InMemFilterStore::add_filters_for_point( - location_t point, const std::vector &filters) { - throw ANNException("Not implemented!", -1); -} - -template -float InMemFilterStore::get_predicate_selectivity( - const AbstractPredicate &pred) const { - return 0.0f; -} template const std::unordered_map> & diff --git a/src/index.cpp b/src/index.cpp index 593b86250..dd08bdc16 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1,10 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -#include - -#include - +#include "ann_exception.h" #include "boost/dynamic_bitset.hpp" #include "index_factory.h" #include "memory_mapper.h" @@ -12,7 +9,10 @@ #include "timer.h" #include "tsl/robin_map.h" #include "tsl/robin_set.h" +#include "utils.h" #include "windows_customizations.h" +#include +#include #if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && \ defined(DISKANN_BUILD) #include "gperftools/malloc_extension.h"