Skip to content

Commit 1b668dc

Browse files
authored
Merge branch 'chroma-core:master' into master
2 parents 28691a3 + 1aaa5e1 commit 1b668dc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+4894
-3830
lines changed

.github/workflows/release.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@ jobs:
2727
with:
2828
python-version: '3.10'
2929
- name: Install cibuildwheel
30-
run: python -m pip install cibuildwheel==2.15.0
30+
run: python -m pip install cibuildwheel==2.19.1
3131
- name: Build wheels
3232
run: python -m cibuildwheel --output-dir dist
3333
env:
3434
CIBW_ENVIRONMENT: HNSWLIB_NO_NATIVE=true CFLAGS='-O2' CXXFLAGS='-O2'
3535
CIBW_ENVIRONMENT_PASS_LINUX: HNSWLIB_NO_NATIVE
3636
CIBW_PROJECT_REQUIRES_PYTHON: ">=3.7"
37-
CIBW_SKIP: "cp312-* pp* *musllinux*"
37+
CIBW_SKIP: "pp* *musllinux* cp312-win*"
3838
CIBW_ARCHS_MACOS: "x86_64 arm64"
3939
CIBW_ARCHS_WINDOWS: "AMD64"
4040
CIBW_ARCHS_LINUX: "x86_64 aarch64"

examples/cpp/example_filter.cpp

+28-20
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,46 @@
11
#include "../../hnswlib/hnswlib.h"
22

3-
43
// Filter that allows labels divisible by divisor
5-
class PickDivisibleIds: public hnswlib::BaseFilterFunctor {
6-
unsigned int divisor = 1;
7-
public:
8-
PickDivisibleIds(unsigned int divisor): divisor(divisor) {
4+
class PickDivisibleIds : public hnswlib::BaseFilterFunctor
5+
{
6+
unsigned int divisor = 1;
7+
8+
public:
9+
PickDivisibleIds(unsigned int divisor) : divisor(divisor)
10+
{
911
assert(divisor != 0);
1012
}
11-
bool operator()(hnswlib::labeltype label_id) {
13+
bool operator()(hnswlib::labeltype label_id)
14+
{
1215
return label_id % divisor == 0;
1316
}
1417
};
1518

16-
17-
int main() {
18-
int dim = 16; // Dimension of the elements
19-
int max_elements = 10000; // Maximum number of elements, should be known beforehand
20-
int M = 16; // Tightly connected with internal dimensionality of the data
21-
// strongly affects the memory consumption
22-
int ef_construction = 200; // Controls index search speed/build speed tradeoff
19+
int main()
20+
{
21+
int dim = 16; // Dimension of the elements
22+
int max_elements = 10000; // Maximum number of elements, should be known beforehand
23+
int M = 16; // Tightly connected with internal dimensionality of the data
24+
// strongly affects the memory consumption
25+
int ef_construction = 200; // Controls index search speed/build speed tradeoff
2326

2427
// Initing index
2528
hnswlib::L2Space space(dim);
26-
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);
29+
hnswlib::HierarchicalNSW<float> *alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);
2730

2831
// Generate random data
2932
std::mt19937 rng;
3033
rng.seed(47);
3134
std::uniform_real_distribution<> distrib_real;
32-
float* data = new float[dim * max_elements];
33-
for (int i = 0; i < dim * max_elements; i++) {
35+
float *data = new float[dim * max_elements];
36+
for (int i = 0; i < dim * max_elements; i++)
37+
{
3438
data[i] = distrib_real(rng);
3539
}
3640

3741
// Add data to index
38-
for (int i = 0; i < max_elements; i++) {
42+
for (int i = 0; i < max_elements; i++)
43+
{
3944
alg_hnsw->addPoint(data + i * dim, i);
4045
}
4146

@@ -44,10 +49,13 @@ int main() {
4449

4550
// Query the elements for themselves with filter and check returned labels
4651
int k = 10;
47-
for (int i = 0; i < max_elements; i++) {
52+
for (int i = 0; i < max_elements; i++)
53+
{
4854
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo);
49-
for (auto item: result) {
50-
if (item.second % 2 == 1) std::cout << "Error: found odd label\n";
55+
for (auto item : result)
56+
{
57+
if (item.second % 2 == 1)
58+
std::cout << "Error: found odd label\n";
5159
}
5260
}
5361

examples/cpp/example_mt_filter.cpp

+51-38
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
11
#include "../../hnswlib/hnswlib.h"
22
#include <thread>
33

4-
54
// Multithreaded executor
65
// The helper function copied from python_bindings/bindings.cpp (and that itself is copied from nmslib)
76
// An alternative is using #pragme omp parallel for or any other C++ threading
8-
template<class Function>
9-
inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) {
10-
if (numThreads <= 0) {
7+
template <class Function>
8+
inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn)
9+
{
10+
if (numThreads <= 0)
11+
{
1112
numThreads = std::thread::hardware_concurrency();
1213
}
1314

14-
if (numThreads == 1) {
15-
for (size_t id = start; id < end; id++) {
15+
if (numThreads == 1)
16+
{
17+
for (size_t id = start; id < end; id++)
18+
{
1619
fn(id, 0);
1720
}
18-
} else {
21+
}
22+
else
23+
{
1924
std::vector<std::thread> threads;
2025
std::atomic<size_t> current(start);
2126

@@ -24,8 +29,10 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn
2429
std::exception_ptr lastException = nullptr;
2530
std::mutex lastExceptMutex;
2631

27-
for (size_t threadId = 0; threadId < numThreads; ++threadId) {
28-
threads.push_back(std::thread([&, threadId] {
32+
for (size_t threadId = 0; threadId < numThreads; ++threadId)
33+
{
34+
threads.push_back(std::thread([&, threadId]
35+
{
2936
while (true) {
3037
size_t id = current.fetch_add(1);
3138

@@ -47,75 +54,81 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn
4754
current = end;
4855
break;
4956
}
50-
}
51-
}));
57+
} }));
5258
}
53-
for (auto &thread : threads) {
59+
for (auto &thread : threads)
60+
{
5461
thread.join();
5562
}
56-
if (lastException) {
63+
if (lastException)
64+
{
5765
std::rethrow_exception(lastException);
5866
}
5967
}
6068
}
6169

62-
6370
// Filter that allows labels divisible by divisor
64-
class PickDivisibleIds: public hnswlib::BaseFilterFunctor {
65-
unsigned int divisor = 1;
66-
public:
67-
PickDivisibleIds(unsigned int divisor): divisor(divisor) {
71+
class PickDivisibleIds : public hnswlib::BaseFilterFunctor
72+
{
73+
unsigned int divisor = 1;
74+
75+
public:
76+
PickDivisibleIds(unsigned int divisor) : divisor(divisor)
77+
{
6878
assert(divisor != 0);
6979
}
70-
bool operator()(hnswlib::labeltype label_id) {
80+
bool operator()(hnswlib::labeltype label_id)
81+
{
7182
return label_id % divisor == 0;
7283
}
7384
};
7485

75-
76-
int main() {
77-
int dim = 16; // Dimension of the elements
78-
int max_elements = 10000; // Maximum number of elements, should be known beforehand
79-
int M = 16; // Tightly connected with internal dimensionality of the data
80-
// strongly affects the memory consumption
81-
int ef_construction = 200; // Controls index search speed/build speed tradeoff
82-
int num_threads = 20; // Number of threads for operations with index
86+
int main()
87+
{
88+
int dim = 16; // Dimension of the elements
89+
int max_elements = 10000; // Maximum number of elements, should be known beforehand
90+
int M = 16; // Tightly connected with internal dimensionality of the data
91+
// strongly affects the memory consumption
92+
int ef_construction = 200; // Controls index search speed/build speed tradeoff
93+
int num_threads = 20; // Number of threads for operations with index
8394

8495
// Initing index
8596
hnswlib::L2Space space(dim);
86-
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);
97+
hnswlib::HierarchicalNSW<float> *alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);
8798

8899
// Generate random data
89100
std::mt19937 rng;
90101
rng.seed(47);
91102
std::uniform_real_distribution<> distrib_real;
92-
float* data = new float[dim * max_elements];
93-
for (int i = 0; i < dim * max_elements; i++) {
103+
float *data = new float[dim * max_elements];
104+
for (int i = 0; i < dim * max_elements; i++)
105+
{
94106
data[i] = distrib_real(rng);
95107
}
96108

97109
// Add data to index
98-
ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId) {
99-
alg_hnsw->addPoint((void*)(data + dim * row), row);
100-
});
110+
ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId)
111+
{ alg_hnsw->addPoint((void *)(data + dim * row), row); });
101112

102113
// Create filter that allows only even labels
103114
PickDivisibleIds pickIdsDivisibleByTwo(2);
104115

105116
// Query the elements for themselves with filter and check returned labels
106117
int k = 10;
107118
std::vector<hnswlib::labeltype> neighbors(max_elements * k);
108-
ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId) {
119+
ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId)
120+
{
109121
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + dim * row, k, &pickIdsDivisibleByTwo);
110122
for (int i = 0; i < k; i++) {
111123
hnswlib::labeltype label = result.top().second;
112124
result.pop();
113125
neighbors[row * k + i] = label;
114-
}
115-
});
126+
} });
116127

117-
for (hnswlib::labeltype label: neighbors) {
118-
if (label % 2 == 1) std::cout << "Error: found odd label\n";
128+
for (hnswlib::labeltype label : neighbors)
129+
{
130+
if (label % 2 == 1)
131+
std::cout << "Error: found odd label\n";
119132
}
120133

121134
delete[] data;

examples/cpp/example_mt_replace_deleted.cpp

+44-36
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
11
#include "../../hnswlib/hnswlib.h"
22
#include <thread>
33

4-
54
// Multithreaded executor
65
// The helper function copied from python_bindings/bindings.cpp (and that itself is copied from nmslib)
76
// An alternative is using #pragme omp parallel for or any other C++ threading
8-
template<class Function>
9-
inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) {
10-
if (numThreads <= 0) {
7+
template <class Function>
8+
inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn)
9+
{
10+
if (numThreads <= 0)
11+
{
1112
numThreads = std::thread::hardware_concurrency();
1213
}
1314

14-
if (numThreads == 1) {
15-
for (size_t id = start; id < end; id++) {
15+
if (numThreads == 1)
16+
{
17+
for (size_t id = start; id < end; id++)
18+
{
1619
fn(id, 0);
1720
}
18-
} else {
21+
}
22+
else
23+
{
1924
std::vector<std::thread> threads;
2025
std::atomic<size_t> current(start);
2126

@@ -24,8 +29,10 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn
2429
std::exception_ptr lastException = nullptr;
2530
std::mutex lastExceptMutex;
2631

27-
for (size_t threadId = 0; threadId < numThreads; ++threadId) {
28-
threads.push_back(std::thread([&, threadId] {
32+
for (size_t threadId = 0; threadId < numThreads; ++threadId)
33+
{
34+
threads.push_back(std::thread([&, threadId]
35+
{
2936
while (true) {
3037
size_t id = current.fetch_add(1);
3138

@@ -47,65 +54,66 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn
4754
current = end;
4855
break;
4956
}
50-
}
51-
}));
57+
} }));
5258
}
53-
for (auto &thread : threads) {
59+
for (auto &thread : threads)
60+
{
5461
thread.join();
5562
}
56-
if (lastException) {
63+
if (lastException)
64+
{
5765
std::rethrow_exception(lastException);
5866
}
5967
}
6068
}
6169

62-
63-
int main() {
64-
int dim = 16; // Dimension of the elements
65-
int max_elements = 10000; // Maximum number of elements, should be known beforehand
66-
int M = 16; // Tightly connected with internal dimensionality of the data
67-
// strongly affects the memory consumption
68-
int ef_construction = 200; // Controls index search speed/build speed tradeoff
69-
int num_threads = 20; // Number of threads for operations with index
70+
int main()
71+
{
72+
int dim = 16; // Dimension of the elements
73+
int max_elements = 10000; // Maximum number of elements, should be known beforehand
74+
int M = 16; // Tightly connected with internal dimensionality of the data
75+
// strongly affects the memory consumption
76+
int ef_construction = 200; // Controls index search speed/build speed tradeoff
77+
int num_threads = 20; // Number of threads for operations with index
7078

7179
// Initing index with allow_replace_deleted=true
72-
int seed = 100;
80+
int seed = 100;
7381
hnswlib::L2Space space(dim);
74-
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, seed, true);
82+
hnswlib::HierarchicalNSW<float> *alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, seed, true);
7583

7684
// Generate random data
7785
std::mt19937 rng;
7886
rng.seed(47);
7987
std::uniform_real_distribution<> distrib_real;
80-
float* data = new float[dim * max_elements];
81-
for (int i = 0; i < dim * max_elements; i++) {
88+
float *data = new float[dim * max_elements];
89+
for (int i = 0; i < dim * max_elements; i++)
90+
{
8291
data[i] = distrib_real(rng);
8392
}
8493

8594
// Add data to index
86-
ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId) {
87-
alg_hnsw->addPoint((void*)(data + dim * row), row);
88-
});
95+
ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId)
96+
{ alg_hnsw->addPoint((void *)(data + dim * row), row); });
8997

9098
// Mark first half of elements as deleted
9199
int num_deleted = max_elements / 2;
92-
ParallelFor(0, num_deleted, num_threads, [&](size_t row, size_t threadId) {
93-
alg_hnsw->markDelete(row);
94-
});
100+
ParallelFor(0, num_deleted, num_threads, [&](size_t row, size_t threadId)
101+
{ alg_hnsw->markDelete(row); });
95102

96103
// Generate additional random data
97-
float* add_data = new float[dim * num_deleted];
98-
for (int i = 0; i < dim * num_deleted; i++) {
104+
float *add_data = new float[dim * num_deleted];
105+
for (int i = 0; i < dim * num_deleted; i++)
106+
{
99107
add_data[i] = distrib_real(rng);
100108
}
101109

102110
// Replace deleted data with new elements
103111
// Maximum number of elements is reached therefore we cannot add new items,
104112
// but we can replace the deleted ones by using replace_deleted=true
105-
ParallelFor(0, num_deleted, num_threads, [&](size_t row, size_t threadId) {
113+
ParallelFor(0, num_deleted, num_threads, [&](size_t row, size_t threadId)
114+
{
106115
hnswlib::labeltype label = max_elements + row;
107-
alg_hnsw->addPoint((void*)(add_data + dim * row), label, true);
108-
});
116+
alg_hnsw->addPoint((void*)(add_data + dim * row), label, true); });
109117

110118
delete[] data;
111119
delete[] add_data;

0 commit comments

Comments
 (0)