Skip to content

Commit d86f8f9

Browse files
authored
Refactoring of project structure (#432)
* Refactor file structure, update readme and examples * Update Makefile * Update git tester * Remove redundant updates_test.cpp, apply suggested changes to example file * Return back python3 in Makefile
1 parent 978f713 commit d86f8f9

35 files changed

+412
-136
lines changed

Diff for: .github/workflows/build.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020

2121
- name: Test
2222
timeout-minutes: 15
23-
run: python -m unittest discover -v --start-directory python_bindings/tests --pattern "*_test*.py"
23+
run: python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py"
2424

2525
test_cpp:
2626
runs-on: ${{matrix.os}}
@@ -48,7 +48,7 @@ jobs:
4848
- name: Prepare test data
4949
run: |
5050
pip install numpy
51-
cd examples
51+
cd tests/cpp/
5252
python update_gen_data.py
5353
shell: bash
5454

Diff for: CMakeLists.txt

+6-6
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,21 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
1616
SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize" )
1717
endif()
1818

19-
add_executable(test_updates examples/updates_test.cpp)
19+
add_executable(test_updates tests/cpp/updates_test.cpp)
2020
target_link_libraries(test_updates hnswlib)
2121

22-
add_executable(searchKnnCloserFirst_test examples/searchKnnCloserFirst_test.cpp)
22+
add_executable(searchKnnCloserFirst_test tests/cpp/searchKnnCloserFirst_test.cpp)
2323
target_link_libraries(searchKnnCloserFirst_test hnswlib)
2424

25-
add_executable(searchKnnWithFilter_test examples/searchKnnWithFilter_test.cpp)
25+
add_executable(searchKnnWithFilter_test tests/cpp/searchKnnWithFilter_test.cpp)
2626
target_link_libraries(searchKnnWithFilter_test hnswlib)
2727

28-
add_executable(multiThreadLoad_test examples/multiThreadLoad_test.cpp)
28+
add_executable(multiThreadLoad_test tests/cpp/multiThreadLoad_test.cpp)
2929
target_link_libraries(multiThreadLoad_test hnswlib)
3030

31-
add_executable(multiThread_replace_test examples/multiThread_replace_test.cpp)
31+
add_executable(multiThread_replace_test tests/cpp/multiThread_replace_test.cpp)
3232
target_link_libraries(multiThread_replace_test hnswlib)
3333

34-
add_executable(main main.cpp sift_1b.cpp)
34+
add_executable(main tests/cpp/main.cpp tests/cpp/sift_1b.cpp)
3535
target_link_libraries(main hnswlib)
3636
endif()

Diff for: Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dist:
77
python3 -m build --sdist
88

99
test:
10-
python3 -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py"
10+
python3 -m unittest discover --start-directory tests/python --pattern "bindings_test*.py"
1111

1212
clean:
1313
rm -rf *.egg-info build dist tmp var tests/__pycache__ hnswlib.cpython*.so

Diff for: README.md

+5-102
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ Properties of `hnswlib.Index` that support reading and writing:
123123
124124

125125
#### Python bindings examples
126+
[See more examples here](examples/EXAMPLES.md)
126127
```python
127128
import hnswlib
128129
import numpy as np
@@ -229,104 +230,6 @@ labels, distances = p.knn_query(data, k=1)
229230
print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(data))), "\n")
230231
```
231232

232-
An example with a filter:
233-
```python
234-
import hnswlib
235-
import numpy as np
236-
237-
dim = 16
238-
num_elements = 10000
239-
240-
# Generating sample data
241-
data = np.float32(np.random.random((num_elements, dim)))
242-
243-
# Declaring index
244-
hnsw_index = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
245-
246-
# Initiating index
247-
# max_elements - the maximum number of elements, should be known beforehand
248-
# (probably will be made optional in the future)
249-
#
250-
# ef_construction - controls index search speed/build speed tradeoff
251-
# M - is tightly connected with internal dimensionality of the data
252-
# strongly affects the memory consumption
253-
254-
hnsw_index.init_index(max_elements=num_elements, ef_construction=100, M=16)
255-
256-
# Controlling the recall by setting ef:
257-
# higher ef leads to better accuracy, but slower search
258-
hnsw_index.set_ef(10)
259-
260-
# Set number of threads used during batch search/construction
261-
# By default using all available cores
262-
hnsw_index.set_num_threads(4)
263-
264-
print("Adding %d elements" % (len(data)))
265-
# Added elements will have consecutive ids
266-
hnsw_index.add_items(data, ids=np.arange(num_elements))
267-
268-
print("Querying only even elements")
269-
# Define filter function that allows only even ids
270-
filter_function = lambda idx: idx%2 == 0
271-
# Query the elements for themselves and search only for even elements:
272-
labels, distances = hnsw_index.knn_query(data, k=1, filter=filter_function)
273-
# labels contain only elements with even id
274-
```
275-
276-
An example with replacing of deleted elements:
277-
```python
278-
import hnswlib
279-
import numpy as np
280-
281-
dim = 16
282-
num_elements = 1_000
283-
max_num_elements = 2 * num_elements
284-
285-
# Generating sample data
286-
labels1 = np.arange(0, num_elements)
287-
data1 = np.float32(np.random.random((num_elements, dim))) # batch 1
288-
labels2 = np.arange(num_elements, 2 * num_elements)
289-
data2 = np.float32(np.random.random((num_elements, dim))) # batch 2
290-
labels3 = np.arange(2 * num_elements, 3 * num_elements)
291-
data3 = np.float32(np.random.random((num_elements, dim))) # batch 3
292-
293-
# Declaring index
294-
hnsw_index = hnswlib.Index(space='l2', dim=dim)
295-
296-
# Initiating index
297-
# max_elements - the maximum number of elements, should be known beforehand
298-
# (probably will be made optional in the future)
299-
#
300-
# ef_construction - controls index search speed/build speed tradeoff
301-
# M - is tightly connected with internal dimensionality of the data
302-
# strongly affects the memory consumption
303-
304-
# Enable replacing of deleted elements
305-
hnsw_index.init_index(max_elements=max_num_elements, ef_construction=200, M=16, allow_replace_deleted=True)
306-
307-
# Controlling the recall by setting ef:
308-
# higher ef leads to better accuracy, but slower search
309-
hnsw_index.set_ef(10)
310-
311-
# Set number of threads used during batch search/construction
312-
# By default using all available cores
313-
hnsw_index.set_num_threads(4)
314-
315-
# Add batch 1 and 2 data
316-
hnsw_index.add_items(data1, labels1)
317-
hnsw_index.add_items(data2, labels2) # Note: maximum number of elements is reached
318-
319-
# Delete data of batch 2
320-
for label in labels2:
321-
hnsw_index.mark_deleted(label)
322-
323-
# Replace deleted elements
324-
# Maximum number of elements is reached therefore we cannot add new items,
325-
# but we can replace the deleted ones by using replace_deleted=True
326-
hnsw_index.add_items(data3, labels3, replace_deleted=True)
327-
# hnsw_index contains the data of batch 1 and batch 3 only
328-
```
329-
330233
### Bindings installation
331234

332235
You can install from sources:
@@ -346,9 +249,9 @@ Contributions are highly welcome!
346249

347250
Please make pull requests against the `develop` branch.
348251

349-
When making changes please run tests (and please add a test to `python_bindings/tests` in case there is new functionality):
252+
When making changes please run tests (and please add a test to `tests/python` in case there is new functionality):
350253
```bash
351-
python -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py"
254+
python -m unittest discover --start-directory tests/python --pattern "bindings_test*.py"
352255
```
353256

354257

@@ -373,7 +276,7 @@ https://github.com/dbaranchuk/ivf-hnsw
373276
### 200M SIFT test reproduction
374277
To download and extract the bigann dataset (from root directory):
375278
```bash
376-
python3 download_bigann.py
279+
python tests/cpp/download_bigann.py
377280
```
378281
To compile:
379282
```bash
@@ -393,7 +296,7 @@ The size of the BigANN subset (in millions) is controlled by the variable **subs
393296
### Updates test
394297
To generate testing data (from root directory):
395298
```bash
396-
cd examples
299+
cd tests/cpp
397300
python update_gen_data.py
398301
```
399302
To compile (from root directory):

0 commit comments

Comments
 (0)