diff --git a/.github/workflows/actions/run-engine-benchmark/action.yaml b/.github/workflows/actions/run-engine-benchmark/action.yaml new file mode 100644 index 00000000..c1152814 --- /dev/null +++ b/.github/workflows/actions/run-engine-benchmark/action.yaml @@ -0,0 +1,38 @@ +name: Run Engine Benchmark +description: "Run benchmark with specified params" +inputs: + engine: + description: "engine (i.e qdrant-default)" + required: true + dataset: + description: "dataset (i.e random-100)" + required: true + compose_file: + description: "path to docker compose" + required: true + +runs: + using: "composite" + steps: + - name: Install poetry + shell: bash + run: pip install poetry + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + - name: Install deps + shell: bash + run: poetry install + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "${{ inputs.compose_file }}" + - name: Execution + shell: bash + run: | + engine="${{ inputs.engine }}" + if [[ "$engine" == *"elasticsearch"* || "$engine" == *"opensearch"* ]]; then + ./tools/wait_for_green_status.sh + fi + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "${{ inputs.engine }}" --datasets "${{ inputs.dataset }}" \ No newline at end of file diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml new file mode 100644 index 00000000..38737409 --- /dev/null +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -0,0 +1,138 @@ +name: Manual All Engines Default Benchmarks + +on: + push: + branches: + - "master" + pull_request: + types: + - opened + - reopened + workflow_dispatch: + +jobs: + elasticsearchBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/elasticsearch') || + startsWith(github.event.head_commit.modified, 'engine/servers/elasticsearch') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) + name: benchmark - elasticsearch-default - random-100 - against elasticsearch-single-node-ci + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v3 + - uses: ./.github/workflows/actions/run-engine-benchmark + with: + engine: "elasticsearch-default" + dataset: "random-100" + compose_file: "engine/servers/elasticsearch-single-node-ci/docker-compose.yaml" + + milvusBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/milvus') || + startsWith(github.event.head_commit.modified, 'engine/servers/milvus') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) + name: benchmark - milvus-default - random-100 - against milvus-single-node + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v3 + - uses: ./.github/workflows/actions/run-engine-benchmark + with: + engine: "milvus-default" + dataset: "random-100" + compose_file: "engine/servers/milvus-single-node/docker-compose.yaml" + + opensearchBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/opensearch') || + startsWith(github.event.head_commit.modified, 'engine/servers/opensearch') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) + name: benchmark - opensearch-default - glove-25-angular - against opensearch-single-node-ci + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v3 + - uses: ./.github/workflows/actions/run-engine-benchmark + with: + engine: "opensearch-default" + dataset: "glove-25-angular" + compose_file: "engine/servers/opensearch-single-node-ci/docker-compose.yaml" + + pgvectorBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/pgvector') || + startsWith(github.event.head_commit.modified, 'engine/servers/pgvector') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) + name: benchmark - pgvector-default - random-100 - against pgvector-single-node + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v3 + - uses: ./.github/workflows/actions/run-engine-benchmark + with: + engine: "pgvector-default" + dataset: "random-100" + compose_file: "engine/servers/pgvector-single-node/docker-compose.yaml" + + qdrantBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/qdrant') || + startsWith(github.event.head_commit.modified, 'engine/servers/qdrant') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) + name: benchmark - qdrant-default - random-100 - against qdrant-single-node + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v3 + - uses: ./.github/workflows/actions/run-engine-benchmark + with: + engine: "qdrant-default" + dataset: "random-100" + compose_file: "engine/servers/qdrant-single-node/docker-compose.yaml" + + redisBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/redis') || + startsWith(github.event.head_commit.modified, 'engine/servers/redis') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) + name: benchmark - redis-default - random-100 - against redis-single-node + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v3 + - uses: ./.github/workflows/actions/run-engine-benchmark + with: + engine: "redis-default" + dataset: "random-100" + compose_file: "engine/servers/redis-single-node/docker-compose.yaml" + + weaviateBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/weaviate') || + startsWith(github.event.head_commit.modified, 'engine/servers/weaviate') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) + name: benchmark - weaviate-default - random-100 - against weaviate-single-node + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v3 + - uses: ./.github/workflows/actions/run-engine-benchmark + with: + engine: "weaviate-default" + dataset: "random-100" + compose_file: "engine/servers/weaviate-single-node/docker-compose.yaml" diff --git a/engine/servers/elasticsearch-single-node-ci/docker-compose.yaml b/engine/servers/elasticsearch-single-node-ci/docker-compose.yaml new file mode 100644 index 00000000..cdac3f65 --- /dev/null +++ b/engine/servers/elasticsearch-single-node-ci/docker-compose.yaml @@ -0,0 +1,23 @@ +version: '3.5' + +services: + es: + image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2 + environment: + ELASTIC_PASSWORD: "passwd" + KIBANA_PASSWORD: "passwd" + SERVER_SSL_ENABLED: "false" + discovery.type: "single-node" + xpack.security.enabled: "false" + ports: + - "9200:9200" + - "9300:9300" + logging: + driver: "json-file" + options: + max-file: 1 + max-size: 10m + deploy: + resources: + limits: + memory: 4Gb diff --git a/engine/servers/opensearch-single-node-ci/docker-compose.yaml b/engine/servers/opensearch-single-node-ci/docker-compose.yaml new file mode 100644 index 00000000..18d97779 --- /dev/null +++ b/engine/servers/opensearch-single-node-ci/docker-compose.yaml @@ -0,0 +1,21 @@ +version: '3.5' + +services: + opensearch: + image: opensearchproject/opensearch:2.10.0 + environment: + discovery.type: "single-node" + plugins.security.disabled: true + OPENSEARCH_JAVA_OPTS: "-Xms2g -Xmx2g" + ports: + - "9200:9200" + - "9300:9300" + logging: + driver: "json-file" + options: + max-file: 1 + max-size: 10m + deploy: + resources: + limits: + memory: 4Gb diff --git a/poetry.lock b/poetry.lock index 7ca44280..d58e98b2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "annotated-types" @@ -11,6 +11,9 @@ files = [ {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, ] +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + [[package]] name = "anyio" version = "4.3.0" @@ -194,6 +197,34 @@ files = [ {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, ] +[[package]] +name = "backports-zoneinfo" +version = "0.2.1" +description = "Backport of the standard library zoneinfo module" +optional = false +python-versions = ">=3.6" +files = [ + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win32.whl", hash = "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win32.whl", hash = "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win32.whl", hash = "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6"}, + {file = "backports.zoneinfo-0.2.1.tar.gz", hash = "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"}, +] + +[package.extras] +tzdata = ["tzdata"] + [[package]] name = "certifi" version = "2024.2.2" @@ -1372,6 +1403,7 @@ files = [ ] [package.dependencies] +"backports.zoneinfo" = {version = ">=0.2.0", markers = "python_version < \"3.9\""} psycopg-binary = {version = "3.1.18", optional = true, markers = "implementation_name != \"pypy\" and extra == \"binary\""} typing-extensions = ">=4.1" tzdata = {version = "*", markers = "sys_platform == \"win32\""} @@ -1724,6 +1756,7 @@ azure-storage-blob = "*" environs = "<=9.5.0" grpcio = ">=1.49.1,<=1.60.0" minio = ">=7.0.0" +numpy = {version = "<1.25.0", markers = "python_version <= \"3.8\""} pandas = ">=1.2.4" protobuf = ">=3.20.0" pyarrow = ">=12.0.0" @@ -1937,48 +1970,6 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] -[[package]] -name = "scipy" -version = "1.12.0" -description = "Fundamental algorithms for scientific computing in Python" -optional = false -python-versions = ">=3.9" -files = [ - {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"}, - {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"}, - {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"}, - {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"}, - {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"}, - {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"}, - {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"}, - {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"}, - {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"}, - {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"}, - {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"}, - {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"}, - {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"}, - {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"}, - {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"}, - {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"}, - {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"}, - {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"}, - {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"}, - {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"}, - {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"}, - {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"}, - {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"}, - {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"}, - {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"}, -] - -[package.dependencies] -numpy = ">=1.22.4,<1.29.0" - -[package.extras] -dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] -doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] -test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] - [[package]] name = "setuptools" version = "69.2.0" @@ -2314,5 +2305,5 @@ validators = "0.22.0" [metadata] lock-version = "2.0" -python-versions = ">=3.9,<3.12" -content-hash = "badb7b46af420d7b474a7b6e2aa8dc926d45ab8631342eee8ab8ab808d86c90c" +python-versions = ">=3.8,<3.12" +content-hash = "66b915f6915c79f83165dc5fb39f363ca53c493668ff87bb5b4953fb712cd4cc" diff --git a/tools/wait_for_green_status.sh b/tools/wait_for_green_status.sh new file mode 100755 index 00000000..229a9527 --- /dev/null +++ b/tools/wait_for_green_status.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# This scripts helps to wait for Opensearch|Elasticsearch status to become Green + +set -e + +SEARCH_CLUSTER_HOST=${1:-"localhost:9200"} + +# Wait until the search cluster host is available +until $(curl --output /dev/null --silent --head --fail "$SEARCH_CLUSTER_HOST"); do + printf '.' + sleep 1 # Wait for 1 second +done + +# Wait for ES/OS to start +response=$(curl --write-out %{http_code} --silent --output /dev/null "$SEARCH_CLUSTER_HOST") + +until [ "$response" = "200" ]; do + response=$(curl --write-out %{http_code} --silent --output /dev/null "$SEARCH_CLUSTER_HOST") + >&2 echo "Search cluster is unavailable - sleep 1s" + sleep 1 +done + +# Wait for ES/OS status to turn Green +health="$(curl -fsSL "$SEARCH_CLUSTER_HOST/_cat/health?h=status")" +health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" + +until [ "$health" = 'green' ]; do + health="$(curl -fsSL "$SEARCH_CLUSTER_HOST/_cat/health?h=status")" + health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" + >&2 echo "Search cluster status is not green yet - sleep 1s" + sleep 1 +done + +>&2 echo "Search cluster is up" \ No newline at end of file