Skip to content

Commit

Permalink
Jegao/streaming ann fix (#571)
Browse files Browse the repository at this point in the history
* add 128 bits tag type

* fix link error

* fix some issue

* Fix graph store reserve space
  • Loading branch information
Sanhaoji2 authored Jul 24, 2024
1 parent 80061da commit 4d473e5
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 4 deletions.
5 changes: 5 additions & 0 deletions include/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,11 @@ template <> inline const char *diskann_type_to_name<int64_t>()
return "int64";
}

template <> inline const char* diskann_type_to_name<diskann::tag_uint128>()
{
return "tag_uint128";
}

#ifdef _WINDOWS
#include <intrin.h>
#include <Psapi.h>
Expand Down
47 changes: 47 additions & 0 deletions src/abstract_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ template DISKANN_DLLEXPORT void AbstractIndex::build<int8_t, uint64_t>(const int
template DISKANN_DLLEXPORT void AbstractIndex::build<uint8_t, uint64_t>(const uint8_t *data,
const size_t num_points_to_load,
const std::vector<uint64_t> &tags);
template DISKANN_DLLEXPORT void AbstractIndex::build<float, tag_uint128>(const float* data,
const size_t num_points_to_load,
const std::vector<tag_uint128>& tags);
template DISKANN_DLLEXPORT void AbstractIndex::build<int8_t, tag_uint128>(const int8_t* data,
const size_t num_points_to_load,
const std::vector<tag_uint128>& tags);
template DISKANN_DLLEXPORT void AbstractIndex::build<uint8_t, tag_uint128>(const uint8_t* data,
const size_t num_points_to_load,
const std::vector<tag_uint128>& tags);

template DISKANN_DLLEXPORT std::pair<uint32_t, uint32_t> AbstractIndex::search<float, uint32_t>(
const float *query, const size_t K, const uint32_t L, uint32_t *indices, float *distances);
Expand Down Expand Up @@ -211,6 +220,18 @@ template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags<int8_t, uint64
const int8_t *query, const uint64_t K, const uint32_t L, uint64_t *tags, float *distances,
std::vector<int8_t *> &res_vectors, bool use_filters, const std::string filter_label);

template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags<float, tag_uint128>(
const float* query, const uint64_t K, const uint32_t L, tag_uint128* tags, float* distances,
std::vector<float*>& res_vectors, bool use_filters, const std::string filter_label);

template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags<uint8_t, tag_uint128>(
const uint8_t* query, const uint64_t K, const uint32_t L, tag_uint128* tags, float* distances,
std::vector<uint8_t*>& res_vectors, bool use_filters, const std::string filter_label);

template DISKANN_DLLEXPORT size_t AbstractIndex::search_with_tags<int8_t, tag_uint128>(
const int8_t* query, const uint64_t K, const uint32_t L, tag_uint128* tags, float* distances,
std::vector<int8_t*>& res_vectors, bool use_filters, const std::string filter_label);

template DISKANN_DLLEXPORT void AbstractIndex::search_with_optimized_layout<float>(const float *query, size_t K,
size_t L, uint32_t *indices);
template DISKANN_DLLEXPORT void AbstractIndex::search_with_optimized_layout<uint8_t>(const uint8_t *query, size_t K,
Expand All @@ -234,6 +255,10 @@ template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint64_t>(cons
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t>(const uint8_t *point, const uint64_t tag);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t>(const int8_t *point, const uint64_t tag);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, tag_uint128>(const float* point, const tag_uint128 tag);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, tag_uint128>(const uint8_t* point, const tag_uint128 tag);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, tag_uint128>(const int8_t* point, const tag_uint128 tag);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int32_t, uint16_t>(
const float *point, const int32_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int32_t, uint16_t>(
Expand Down Expand Up @@ -262,6 +287,13 @@ template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t, ui
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t, uint16_t>(
const int8_t *point, const uint64_t tag, const std::vector<uint16_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, tag_uint128, uint16_t>(
const float* point, const tag_uint128 tag, const std::vector<uint16_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, tag_uint128, uint16_t>(
const uint8_t* point, const tag_uint128 tag, const std::vector<uint16_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, tag_uint128, uint16_t>(
const int8_t* point, const tag_uint128 tag, const std::vector<uint16_t>& labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int32_t, uint32_t>(
const float *point, const int32_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int32_t, uint32_t>(
Expand Down Expand Up @@ -290,10 +322,18 @@ template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t, ui
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t, uint32_t>(
const int8_t *point, const uint64_t tag, const std::vector<uint32_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, tag_uint128, uint32_t>(
const float* point, const tag_uint128 tag, const std::vector<uint32_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, tag_uint128, uint32_t>(
const uint8_t* point, const tag_uint128 tag, const std::vector<uint32_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, tag_uint128, uint32_t>(
const int8_t* point, const tag_uint128 tag, const std::vector<uint32_t>& labels);

template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<int32_t>(const int32_t &tag);
template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<uint32_t>(const uint32_t &tag);
template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<int64_t>(const int64_t &tag);
template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<uint64_t>(const uint64_t &tag);
template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<tag_uint128>(const tag_uint128& tag);

template DISKANN_DLLEXPORT void AbstractIndex::lazy_delete<int32_t>(const std::vector<int32_t> &tags,
std::vector<int32_t> &failed_tags);
Expand All @@ -303,11 +343,14 @@ template DISKANN_DLLEXPORT void AbstractIndex::lazy_delete<int64_t>(const std::v
std::vector<int64_t> &failed_tags);
template DISKANN_DLLEXPORT void AbstractIndex::lazy_delete<uint64_t>(const std::vector<uint64_t> &tags,
std::vector<uint64_t> &failed_tags);
template DISKANN_DLLEXPORT void AbstractIndex::lazy_delete<tag_uint128>(const std::vector<tag_uint128>& tags,
std::vector<tag_uint128>& failed_tags);

template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags<int32_t>(tsl::robin_set<int32_t> &active_tags);
template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags<uint32_t>(tsl::robin_set<uint32_t> &active_tags);
template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags<int64_t>(tsl::robin_set<int64_t> &active_tags);
template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags<uint64_t>(tsl::robin_set<uint64_t> &active_tags);
template DISKANN_DLLEXPORT void AbstractIndex::get_active_tags<tag_uint128>(tsl::robin_set<tag_uint128>& active_tags);

template DISKANN_DLLEXPORT void AbstractIndex::set_start_points_at_random<float>(float radius, uint32_t random_seed);
template DISKANN_DLLEXPORT void AbstractIndex::set_start_points_at_random<uint8_t>(uint8_t radius,
Expand All @@ -328,6 +371,10 @@ template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag<uint64_t, float>
template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag<uint64_t, uint8_t>(uint64_t &tag, uint8_t *vec);
template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag<uint64_t, int8_t>(uint64_t &tag, int8_t *vec);

template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag<tag_uint128, float>(tag_uint128& tag, float* vec);
template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag<tag_uint128, uint8_t>(tag_uint128& tag, uint8_t* vec);
template DISKANN_DLLEXPORT int AbstractIndex::get_vector_by_tag<tag_uint128, int8_t>(tag_uint128& tag, int8_t* vec);

template DISKANN_DLLEXPORT void AbstractIndex::set_universal_label<uint16_t>(const uint16_t label);
template DISKANN_DLLEXPORT void AbstractIndex::set_universal_label<uint32_t>(const uint32_t label);

Expand Down
13 changes: 9 additions & 4 deletions src/index_factory.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "index_factory.h"
#include "tag_uint128.h"
#include "pq_l2_distance.h"

namespace diskann
Expand Down Expand Up @@ -42,10 +43,10 @@ void IndexFactory::check_config()
}

if (_config->tag_type != "int32" && _config->tag_type != "uint32" && _config->tag_type != "int64" &&
_config->tag_type != "uint64")
_config->tag_type != "uint64" && _config->tag_type != "tag_uint128")
{
throw ANNException("ERROR: invalid data type : + " + _config->tag_type +
" is not supported. please select from [int32, uint32, int64, uint64]",
" is not supported. please select from [int32, uint32, int64, uint64, tag_uint128]",
-1);
}
}
Expand Down Expand Up @@ -127,7 +128,7 @@ std::unique_ptr<AbstractIndex> IndexFactory::create_instance()
if (_config->data_strategy == DataStoreStrategy::MEMORY && _config->pq_dist_build)
{
pq_data_store =
construct_pq_datastore<data_type>(_config->data_strategy, num_points + _config->num_frozen_pts, dim,
construct_pq_datastore<data_type>(_config->data_strategy, num_points, dim,
_config->metric, _config->num_pq_chunks, _config->use_opq);
}
else
Expand All @@ -138,7 +139,7 @@ std::unique_ptr<AbstractIndex> IndexFactory::create_instance()
(size_t)(defaults::GRAPH_SLACK_FACTOR * 1.05 *
(_config->index_write_params == nullptr ? 0 : _config->index_write_params->max_degree));
std::unique_ptr<AbstractGraphStore> graph_store =
construct_graphstore(_config->graph_strategy, num_points + _config->num_frozen_pts, max_reserve_degree);
construct_graphstore(_config->graph_strategy, num_points, max_reserve_degree);

// REFACTOR TODO: Must construct in-memory PQDatastore if strategy == ONDISK and must construct
// in-mem and on-disk PQDataStore if strategy == ONDISK and diskPQ is required.
Expand Down Expand Up @@ -184,6 +185,10 @@ std::unique_ptr<AbstractIndex> IndexFactory::create_instance(const std::string &
{
return create_instance<data_type, uint64_t>(label_type);
}
else if (tag_type == std::string("tag_uint128"))
{
return create_instance<data_type, tag_uint128>(label_type);
}
else
throw ANNException("Error: unsupported tag_type please choose from [int32/uint32/int64/uint64]", -1);
}
Expand Down

0 comments on commit 4d473e5

Please sign in to comment.