diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9fa5b20a..38e114ea 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,7 +34,7 @@ jobs: CIBW_ENVIRONMENT: HNSWLIB_NO_NATIVE=true CFLAGS='-O2' CXXFLAGS='-O2' CIBW_ENVIRONMENT_PASS_LINUX: HNSWLIB_NO_NATIVE CIBW_PROJECT_REQUIRES_PYTHON: ">=3.7" - CIBW_SKIP: "cp312-* pp* *musllinux*" + CIBW_SKIP: "pp* *musllinux*" CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ARCHS_WINDOWS: "AMD64" CIBW_ARCHS_LINUX: "x86_64 aarch64" @@ -43,30 +43,29 @@ jobs: with: name: python-package-distributions path: dist - - # upload: - # runs-on: ubuntu-latest - # needs: build - # steps: - # - uses: actions/checkout@v3 - # - uses: actions/setup-python@v4 - # with: - # python-version: "3.10" - # - name: Build sdist - # run: | - # python -m pip install . - # make dist - # - name: Download wheels - # uses: actions/download-artifact@v3 - # with: - # name: python-package-distributions - # path: dist/ - # - name: Publish to Test PyPI - # uses: pypa/gh-action-pypi-publish@release/v1 - # with: - # password: ${{ secrets.TEST_PYPI_API_TOKEN }} - # repository-url: https://test.pypi.org/legacy/ - # - name: Publish to PyPI - # uses: pypa/gh-action-pypi-publish@release/v1 - # with: - # password: ${{ secrets.PYPI_API_TOKEN }} +# upload: +# runs-on: ubuntu-latest +# needs: build +# steps: +# - uses: actions/checkout@v3 +# - uses: actions/setup-python@v4 +# with: +# python-version: "3.10" +# - name: Build sdist +# run: | +# python -m pip install . +# make dist +# - name: Download wheels +# uses: actions/download-artifact@v3 +# with: +# name: python-package-distributions +# path: dist/ +# - name: Publish to Test PyPI +# uses: pypa/gh-action-pypi-publish@release/v1 +# with: +# password: ${{ secrets.TEST_PYPI_API_TOKEN }} +# repository-url: https://test.pypi.org/legacy/ +# - name: Publish to PyPI +# uses: pypa/gh-action-pypi-publish@release/v1 +# with: +# password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9674abb7..1f669113 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,6 +2,7 @@ name: Test on: workflow_call: + workflow_dispatch: {} push: branches: - master @@ -15,7 +16,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest] - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 @@ -26,10 +27,16 @@ jobs: run: python -m pip install . - name: Test + if: matrix.python-version != '3.12' timeout-minutes: 15 run: | python -m unittest discover -v --start-directory examples/python --pattern "example*.py" python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py" + - name: Test Py312 + if: matrix.python-version == '3.12' + timeout-minutes: 15 + run: | + python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py" test_cpp: runs-on: ${{matrix.os}} diff --git a/hnswlib/bruteforce.h b/hnswlib/bruteforce.h index 30b33ae9..26445d82 100644 --- a/hnswlib/bruteforce.h +++ b/hnswlib/bruteforce.h @@ -98,7 +98,7 @@ class BruteforceSearch : public AlgorithmInterface { std::priority_queue> - searchKnn(const void *query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr) const { + searchKnn(const void *query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr, size_t ef = 0) const { assert(k <= cur_element_count); std::priority_queue> topResults; if (cur_element_count == 0) return topResults; diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h index 3b4ea8c0..6daa4a35 100644 --- a/hnswlib/hnswalg.h +++ b/hnswlib/hnswalg.h @@ -1558,7 +1558,7 @@ class HierarchicalNSW : public AlgorithmInterface { std::priority_queue> - searchKnn(const void *query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr) const { + searchKnn(const void *query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr, size_t ef = 0) const { std::priority_queue> result; if (cur_element_count == 0) return result; @@ -1591,13 +1591,15 @@ class HierarchicalNSW : public AlgorithmInterface { } } + size_t effective_ef = ef > 0 ? std::max(ef, ef_) : ef_; + std::priority_queue, std::vector>, CompareByFirst> top_candidates; if (num_deleted_) { top_candidates = searchBaseLayerST( - currObj, query_data, std::max(ef_, k), isIdAllowed); + currObj, query_data, std::max(effective_ef, k), isIdAllowed); } else { top_candidates = searchBaseLayerST( - currObj, query_data, std::max(ef_, k), isIdAllowed); + currObj, query_data, std::max(effective_ef, k), isIdAllowed); } while (top_candidates.size() > k) { diff --git a/hnswlib/hnswlib.h b/hnswlib/hnswlib.h index fb7118fa..a8c0b0c9 100644 --- a/hnswlib/hnswlib.h +++ b/hnswlib/hnswlib.h @@ -161,7 +161,7 @@ class AlgorithmInterface { virtual void addPoint(const void *datapoint, labeltype label, bool replace_deleted = false) = 0; virtual std::priority_queue> - searchKnn(const void*, size_t, BaseFilterFunctor* isIdAllowed = nullptr) const = 0; + searchKnn(const void*, size_t, BaseFilterFunctor* isIdAllowed = nullptr, size_t ef = 0) const = 0; // Return k nearest neighbor in the order of closer fist virtual std::vector> diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp index 17c42ff7..a3be72a7 100644 --- a/python_bindings/bindings.cpp +++ b/python_bindings/bindings.cpp @@ -607,7 +607,7 @@ class Index { py::object input, size_t k = 1, int num_threads = -1, - const std::function& filter = nullptr) { + const std::function& filter = nullptr, size_t ef = 0) { py::array_t < dist_t, py::array::c_style | py::array::forcecast > items(input); auto buffer = items.request(); hnswlib::labeltype* data_numpy_l; @@ -636,7 +636,7 @@ class Index { if (normalize == false) { ParallelFor(0, rows, num_threads, [&](size_t row, size_t threadId) { std::priority_queue> result = appr_alg->searchKnn( - (void*)items.data(row), k, p_idFilter); + (void*)items.data(row), k, p_idFilter, ef); if (result.size() != k) throw std::runtime_error( "Cannot return the results in a contigious 2D array. Probably ef or M is too small"); @@ -656,7 +656,7 @@ class Index { normalize_vector((float*)items.data(row), (norm_array.data() + start_idx)); std::priority_queue> result = appr_alg->searchKnn( - (void*)(norm_array.data() + start_idx), k, p_idFilter); + (void*)(norm_array.data() + start_idx), k, p_idFilter, ef); if (result.size() != k) throw std::runtime_error( "Cannot return the results in a contigious 2D array. Probably ef or M is too small"); @@ -901,7 +901,8 @@ PYBIND11_PLUGIN(hnswlib) { py::arg("data"), py::arg("k") = 1, py::arg("num_threads") = -1, - py::arg("filter") = py::none()) + py::arg("filter") = py::none(), + py::arg("ef") = 0) .def("add_items", &Index::addItems, py::arg("data"),