Skip to content

Commit f2380c1

Browse files
yuejiaointelmihaic
andauthoredMar 28, 2025··
feature!: verify and set default VamanaBuildParameters (#96)
BREAKING CHANGE: Removed the deprecated `num_threads` argument and added a `use_full_search_history` argument in `VamanaBuildParameters` in Python bindings. This change ensures consistency between the Python API and the C++ implementation in `include/svs/index/vamana/build_params.h`. Added default value setting and checking based on doc requirement, and added additional tests. Pin CMake < 4 as a workaround until our dependencies require CMake >= 3.5. --------- Co-authored-by: Mihai Capotă <[email protected]>
1 parent 1e59bf6 commit f2380c1

File tree

10 files changed

+365
-59
lines changed

10 files changed

+365
-59
lines changed
 

‎bindings/python/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
requires = [
1717
"setuptools>=42",
1818
"scikit-build",
19-
"cmake>=3.21", # Keep in-sync with `CMakeLists.txt`
19+
"cmake>=3.21, <4", # Keep in-sync with `CMakeLists.txt`
2020
"numpy>=1.10.0, <2", # Keep in-sync with `setup.py`
2121
"archspec>=0.2.0", # Keep in-sync with `setup.py`
2222
"toml>=0.10.2", # Keep in-sync with `setup.py` required for the tests

‎bindings/python/src/vamana.cpp

+19-30
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "svs/lib/dispatcher.h"
3131
#include "svs/lib/float16.h"
3232
#include "svs/lib/meta.h"
33+
#include "svs/lib/preprocessor.h"
3334
#include "svs/orchestrators/vamana.h"
3435

3536
// pybind
@@ -420,40 +421,22 @@ void wrap(py::module& m) {
420421
size_t window_size,
421422
size_t max_candidate_pool_size,
422423
size_t prune_to,
423-
size_t num_threads) {
424-
if (num_threads != std::numeric_limits<size_t>::max()) {
425-
PyErr_WarnEx(
426-
PyExc_DeprecationWarning,
427-
"Constructing VamanaBuildParameters with the \"num_threads\" "
428-
"keyword "
429-
"argument is deprecated, no longer has any effect, and will be "
430-
"removed "
431-
"from future versions of the library. Use the \"num_threads\" "
432-
"keyword "
433-
"argument of \"svs.Vamana.build\" instead!",
434-
1
435-
);
436-
}
437-
438-
// Default the `prune_to` argument appropriately.
439-
if (prune_to == std::numeric_limits<size_t>::max()) {
440-
prune_to = graph_max_degree;
441-
}
442-
424+
bool use_full_search_history) {
443425
return svs::index::vamana::VamanaBuildParameters{
444426
alpha,
445427
graph_max_degree,
446428
window_size,
447429
max_candidate_pool_size,
448430
prune_to,
449-
true};
431+
use_full_search_history};
450432
}),
451-
py::arg("alpha") = 1.2,
452-
py::arg("graph_max_degree") = 32,
453-
py::arg("window_size") = 64,
454-
py::arg("max_candidate_pool_size") = 80,
455-
py::arg("prune_to") = std::numeric_limits<size_t>::max(),
456-
py::arg("num_threads") = std::numeric_limits<size_t>::max(),
433+
py::arg("alpha") = svs::FLOAT_PLACEHOLDER,
434+
py::arg("graph_max_degree") = svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT,
435+
py::arg("window_size") = svs::VAMANA_WINDOW_SIZE_DEFAULT,
436+
py::arg("max_candidate_pool_size") = svs::UNSIGNED_INTEGER_PLACEHOLDER,
437+
py::arg("prune_to") = svs::UNSIGNED_INTEGER_PLACEHOLDER,
438+
py::arg("use_full_search_history") =
439+
svs::VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT,
457440
R"(
458441
Construct a new instance from keyword arguments.
459442
@@ -462,6 +445,7 @@ void wrap(py::module& m) {
462445
For distance types favoring minimization, set this to a number
463446
greater than 1.0 (typically, 1.2 is sufficient). For distance types
464447
preferring maximization, set to a value less than 1.0 (such as 0.95).
448+
The default value is 1.2 for L2 distance type and 0.95 for MIP/Cosine.
465449
graph_max_degree: The maximum out-degree in the final graph. Graphs with
466450
a higher degree tend to yield better accuracy and performance at the cost
467451
of a larger memory footprint.
@@ -470,10 +454,15 @@ void wrap(py::module& m) {
470454
longer construction time. Should be larger than `graph_max_degree`.
471455
max_candidate_pool_size: Limit on the number of candidates to consider
472456
for neighbor updates. Should be larger than `window_size`.
457+
The default value is ``graph_max_degree`` * 2.
473458
prune_to: Amount candidate lists will be pruned to when exceeding the
474459
target max degree. In general, setting this to slightly less than
475-
`graph_max_degree` will yield faster index building times. Default:
476-
`graph_max_degree`.
460+
``graph_max_degree`` will yield faster index building times. Default:
461+
` `graph_max_degree`` - 4 if
462+
``graph_max_degree`` is at least 16, otherwise ``graph_max_degree``.
463+
use_full_search_history: When true, uses the full search history during
464+
graph construction, which can improve graph quality at the expense of
465+
additional memory and potentially longer build times.
477466
)"
478467
)
479468
.def_readwrite("alpha", &svs::index::vamana::VamanaBuildParameters::alpha)
@@ -557,4 +546,4 @@ overwritten when saving the index to this directory.
557546
)"
558547
);
559548
}
560-
} // namespace svs::python::vamana
549+
} // namespace svs::python::vamana

‎bindings/python/tests/test_dynamic_vamana.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_loop(self):
9898
# here, we set an expected mid-point for the recall and allow it to wander up and
9999
# down by a little.
100100
expected_recall = 0.845
101-
expected_recall_delta = 0.03
101+
expected_recall_delta = 0.05
102102

103103
reference = ReferenceDataset(num_threads = num_threads)
104104
data, ids = reference.new_ids(5000)

‎bindings/python/tests/test_vamana.py

-7
Original file line numberDiff line numberDiff line change
@@ -281,13 +281,6 @@ def test_basic(self):
281281
self._test_basic(loader, matcher, first_iter = first_iter)
282282
first_iter = False
283283

284-
def test_deprecation(self):
285-
with warnings.catch_warnings(record = True) as w:
286-
p = svs.VamanaBuildParameters(num_threads = 1)
287-
self.assertTrue(len(w) == 1)
288-
self.assertTrue(issubclass(w[0].category, DeprecationWarning))
289-
self.assertTrue("VamanaBuildParameters" in str(w[0].message))
290-
291284
def _groundtruth_map(self):
292285
return {
293286
svs.DistanceType.L2: test_groundtruth_l2,

‎include/svs/index/vamana/build_params.h

+8-7
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#pragma once
1818

1919
// svs
20+
#include "svs/lib/preprocessor.h"
2021
#include "svs/lib/saveload.h"
2122

2223
// stl
@@ -44,33 +45,33 @@ struct VamanaBuildParameters {
4445
, use_full_search_history{use_full_search_history_} {}
4546

4647
/// The pruning parameter.
47-
float alpha;
48+
float alpha = svs::FLOAT_PLACEHOLDER;
4849

4950
/// The maximum degree in the graph. A higher max degree may yield a higher quality
5051
/// graph in terms of recall for performance, but the memory footprint of the graph is
5152
/// directly proportional to the maximum degree.
52-
size_t graph_max_degree;
53+
size_t graph_max_degree = svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT;
5354

5455
/// The search window size to use during graph construction. A higher search window
5556
/// size will yield a higher quality graph since more overall vertices are considered,
5657
/// but will increase construction time.
57-
size_t window_size;
58+
size_t window_size = svs::VAMANA_WINDOW_SIZE_DEFAULT;
5859

5960
/// Set a limit on the number of neighbors considered during pruning. In practice, set
6061
/// this to a high number (at least 5 times greater than the window_size) and forget
6162
/// about it.
62-
size_t max_candidate_pool_size;
63+
size_t max_candidate_pool_size = svs::UNSIGNED_INTEGER_PLACEHOLDER;
6364

6465
/// This is the amount that candidates will be pruned to after certain pruning
6566
/// procedures. Setting this to less than ``graph_max_degree`` can result in significant
6667
/// speedups in index building.
67-
size_t prune_to;
68+
size_t prune_to = svs::UNSIGNED_INTEGER_PLACEHOLDER;
6869

6970
/// When building, either the contents of the search buffer can be used or the entire
7071
/// search history can be used.
7172
///
7273
/// The latter case may yield a slightly better graph as the cost of more search time.
73-
bool use_full_search_history = true;
74+
bool use_full_search_history = svs::VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT;
7475

7576
///// Comparison
7677
friend bool
@@ -129,4 +130,4 @@ struct VamanaBuildParameters {
129130
);
130131
}
131132
};
132-
} // namespace svs::index::vamana
133+
} // namespace svs::index::vamana

‎include/svs/index/vamana/dynamic_index.h

+21-8
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "svs/index/vamana/index.h"
3939
#include "svs/index/vamana/vamana_build.h"
4040
#include "svs/lib/boundscheck.h"
41+
#include "svs/lib/preprocessor.h"
4142
#include "svs/lib/threads.h"
4243

4344
namespace svs::index::vamana {
@@ -157,6 +158,9 @@ class MutableVamanaIndex {
157158
float alpha_ = 1.2;
158159
bool use_full_search_history_ = true;
159160

161+
// Construction parameters
162+
VamanaBuildParameters build_parameters_{};
163+
160164
// SVS logger for per index logging
161165
svs::logging::logger_ptr logger_;
162166

@@ -210,12 +214,19 @@ class MutableVamanaIndex {
210214
, distance_(std::move(distance_function))
211215
, threadpool_(threads::as_threadpool(std::move(threadpool_proto)))
212216
, search_parameters_(vamana::construct_default_search_parameters(data_))
213-
, construction_window_size_(parameters.window_size)
214-
, max_candidates_(parameters.max_candidate_pool_size)
215-
, prune_to_(parameters.prune_to)
216-
, alpha_(parameters.alpha)
217-
, use_full_search_history_{parameters.use_full_search_history}
217+
, build_parameters_(parameters)
218218
, logger_{std::move(logger)} {
219+
// Verify and set defaults directly on the input parameters
220+
verify_and_set_default_index_parameters(build_parameters_, distance_function);
221+
222+
// Set graph again as verify function might change graph_max_degree parameter
223+
graph_ = Graph{data_.size(), build_parameters_.graph_max_degree};
224+
construction_window_size_ = build_parameters_.window_size;
225+
max_candidates_ = build_parameters_.max_candidate_pool_size;
226+
prune_to_ = build_parameters_.prune_to;
227+
alpha_ = build_parameters_.alpha;
228+
use_full_search_history_ = build_parameters_.use_full_search_history;
229+
219230
// Setup the initial translation of external to internal ids.
220231
translator_.insert(external_ids, threads::UnitRange<Idx>(0, external_ids.size()));
221232

@@ -227,10 +238,12 @@ class MutableVamanaIndex {
227238
auto prefetch_parameters =
228239
GreedySearchPrefetchParameters{sp.prefetch_lookahead_, sp.prefetch_step_};
229240
auto builder = VamanaBuilder(
230-
graph_, data_, distance_, parameters, threadpool_, prefetch_parameters
241+
graph_, data_, distance_, build_parameters_, threadpool_, prefetch_parameters
231242
);
232243
builder.construct(1.0f, entry_point_[0], logging::Level::Info, logger_);
233-
builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger_);
244+
builder.construct(
245+
build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger_
246+
);
234247
}
235248

236249
/// @brief Post re-load constructor.
@@ -1346,4 +1359,4 @@ auto auto_dynamic_assemble(
13461359
std::move(logger)};
13471360
}
13481361

1349-
} // namespace svs::index::vamana
1362+
} // namespace svs::index::vamana

‎include/svs/index/vamana/index.h

+64-5
Original file line numberDiff line numberDiff line change
@@ -404,19 +404,22 @@ class VamanaIndex {
404404
if (graph_.n_nodes() != data_.size()) {
405405
throw ANNEXCEPTION("Wrong sizes!");
406406
}
407-
408407
build_parameters_ = parameters;
408+
// verify the parameters before set local var
409+
verify_and_set_default_index_parameters(build_parameters_, distance_function);
409410
auto builder = VamanaBuilder(
410411
graph_,
411412
data_,
412413
distance_,
413-
parameters,
414+
build_parameters_,
414415
threadpool_,
415416
extensions::estimate_prefetch_parameters(data_)
416417
);
417418

418419
builder.construct(1.0F, entry_point_[0], logging::Level::Info, logger);
419-
builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger);
420+
builder.construct(
421+
build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger
422+
);
420423
}
421424

422425
/// @brief Getter method for logger
@@ -896,10 +899,13 @@ auto auto_build(
896899
auto entry_point = extensions::compute_entry_point(data, threadpool);
897900

898901
// Default graph.
899-
auto graph = default_graph(data.size(), parameters.graph_max_degree, graph_allocator);
902+
auto verified_parameters = parameters;
903+
verify_and_set_default_index_parameters(verified_parameters, distance);
904+
auto graph =
905+
default_graph(data.size(), verified_parameters.graph_max_degree, graph_allocator);
900906
using I = typename decltype(graph)::index_type;
901907
return VamanaIndex{
902-
parameters,
908+
verified_parameters,
903909
std::move(graph),
904910
std::move(data),
905911
lib::narrow<I>(entry_point),
@@ -959,4 +965,57 @@ auto auto_assemble(
959965
index.apply(config);
960966
return index;
961967
}
968+
969+
/// @brief Verify parameters and set defaults if needed
970+
template <typename Dist>
971+
void verify_and_set_default_index_parameters(
972+
VamanaBuildParameters& parameters, Dist distance_function
973+
) {
974+
// Set default values
975+
if (parameters.max_candidate_pool_size == svs::UNSIGNED_INTEGER_PLACEHOLDER) {
976+
parameters.max_candidate_pool_size = 2 * parameters.graph_max_degree;
977+
}
978+
979+
if (parameters.prune_to == svs::UNSIGNED_INTEGER_PLACEHOLDER) {
980+
if (parameters.graph_max_degree >= 16) {
981+
parameters.prune_to = parameters.graph_max_degree - 4;
982+
} else {
983+
parameters.prune_to = parameters.graph_max_degree;
984+
}
985+
}
986+
987+
// Check supported distance type using std::is_same type trait
988+
using dist_type = std::decay_t<decltype(distance_function)>;
989+
// Create type flags for each distance type
990+
constexpr bool is_L2 = std::is_same_v<dist_type, svs::distance::DistanceL2>;
991+
constexpr bool is_IP = std::is_same_v<dist_type, svs::distance::DistanceIP>;
992+
constexpr bool is_Cosine =
993+
std::is_same_v<dist_type, svs::distance::DistanceCosineSimilarity>;
994+
995+
// Handle alpha based on distance type
996+
if constexpr (is_L2) {
997+
if (parameters.alpha == svs::FLOAT_PLACEHOLDER) {
998+
parameters.alpha = svs::VAMANA_ALPHA_MINIMIZE_DEFAULT;
999+
} else if (parameters.alpha < 1.0f) {
1000+
// Check User set values
1001+
throw std::invalid_argument("For L2 distance, alpha must be >= 1.0");
1002+
}
1003+
} else if constexpr (is_IP || is_Cosine) {
1004+
if (parameters.alpha == svs::FLOAT_PLACEHOLDER) {
1005+
parameters.alpha = svs::VAMANA_ALPHA_MAXIMIZE_DEFAULT;
1006+
} else if (parameters.alpha > 1.0f) {
1007+
// Check User set values
1008+
throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0");
1009+
} else if (parameters.alpha <= 0.0f) {
1010+
throw std::invalid_argument("alpha must be > 0");
1011+
}
1012+
} else {
1013+
throw std::invalid_argument("Unsupported distance type");
1014+
}
1015+
1016+
// Check prune_to <= graph_max_degree
1017+
if (parameters.prune_to > parameters.graph_max_degree) {
1018+
throw std::invalid_argument("prune_to must be <= graph_max_degree");
1019+
}
1020+
}
9621021
} // namespace svs::index::vamana

‎include/svs/lib/preprocessor.h

+14
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
#pragma once
1818

19+
#include <cstddef>
20+
#include <limits>
21+
1922
namespace svs::preprocessor::detail {
2023

2124
// consteval functions for working with preprocessor defines.
@@ -159,3 +162,14 @@ inline constexpr bool have_avx512_avx2 = true;
159162
#endif
160163

161164
} // namespace svs::arch
165+
166+
namespace svs {
167+
// Maximum values used as default initializers
168+
inline constexpr size_t UNSIGNED_INTEGER_PLACEHOLDER = std::numeric_limits<size_t>::max();
169+
inline constexpr float FLOAT_PLACEHOLDER = std::numeric_limits<float>::max();
170+
inline constexpr float VAMANA_GRAPH_MAX_DEGREE_DEFAULT = 32;
171+
inline constexpr float VAMANA_WINDOW_SIZE_DEFAULT = 64;
172+
inline constexpr bool VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT = true;
173+
inline constexpr float VAMANA_ALPHA_MINIMIZE_DEFAULT = 1.2;
174+
inline constexpr float VAMANA_ALPHA_MAXIMIZE_DEFAULT = 0.95;
175+
} // namespace svs

0 commit comments

Comments
 (0)
Please sign in to comment.