From 571b10e8592f961b0eb93861c1ee55229a99f068 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Wed, 17 Apr 2024 13:41:32 +0200 Subject: [PATCH 01/12] ci: Run *-default benchmarks for all engines --- .../manual-all-engines-benchmark.yaml | 189 ++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 .github/workflows/manual-all-engines-benchmark.yaml diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml new file mode 100644 index 00000000..26b1a354 --- /dev/null +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -0,0 +1,189 @@ +name: Manual All Engines Default Benchmarks + +on: + push: + branches: + - "ci/benchmark-all-engines" + +jobs: + elasticsearchBenchmark: + name: benchmark - elasticsearch-default - ${{ inputs.dataset }} against elasticsearch-single-node + timeout-minutes: 60 + runs-on: [self-hosted, linux, x64] + steps: + - uses: actions/checkout@v3 + + - name: Install poetry + run: pip install poetry + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + + - name: Install deps + run: poetry install --no-root + + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "engine/servers/elasticsearch-single-node/docker-compose.yaml" + - name: Execution + run: | + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "elasticsearch-default" --datasets "glove-100-angular" + + milvusBenchmark: + name: benchmark - milvus-default - ${{ inputs.dataset }} against milvus-single-node + timeout-minutes: 60 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install poetry + run: pip install poetry + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + + - name: Install deps + run: poetry install + + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "engine/servers/milvus-single-node/docker-compose.yaml" + - name: Execution + run: | + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "milvus-default" --datasets "glove-100-angular" + + opensearchBenchmark: + name: benchmark - opensearch-default - opensearch-default against opensearch-single-node + timeout-minutes: 60 + runs-on: [self-hosted, linux, x64] + steps: + - uses: actions/checkout@v3 + + - name: Install poetry + run: pip install poetry + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + + - name: Install deps + run: poetry install + + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "engine/servers/opensearch-single-node/docker-compose.yaml" + - name: Execution + run: | + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "opensearch-default" --datasets "glove-100-angular" + + pgvectorBenchmark: + name: benchmark - pgvector - pgvector-default against pgvector-single-node + timeout-minutes: 60 + runs-on: [self-hosted, linux, x64] + steps: + - uses: actions/checkout@v3 + + - name: Install poetry + run: pip install poetry + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + + - name: Install deps + run: poetry install + + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "engine/servers/pgvector-single-node/docker-compose.yaml" + - name: Execution + run: | + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "pgvector-default" --datasets "glove-100-angular" + + qdrantBenchmark: + name: benchmark - qdrant - qdrant-default against qdrant-single-node + timeout-minutes: 60 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install poetry + run: pip install poetry + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + + - name: Install deps + run: poetry install + + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "engine/servers/qdrant-single-node/docker-compose.yaml" + - name: Execution + run: | + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "qdrant-default" --datasets "glove-100-angular" + + redisBenchmark: + name: benchmark - redis - redis-default against redis-single-node + timeout-minutes: 60 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install poetry + run: pip install poetry + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + + - name: Install deps + run: poetry install + + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "engine/servers/redis-single-node/docker-compose.yaml" + - name: Execution + run: | + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "redis-default" --datasets "glove-100-angular" + + weaviateBenchmark: + name: benchmark - weaviate - weaviate-default against weaviate-single-node + timeout-minutes: 60 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install poetry + run: pip install poetry + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + + - name: Install deps + run: poetry install + + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "engine/servers/weaviate-single-node/docker-compose.yaml" + - name: Execution + run: | + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "weaviate-default" --datasets "glove-100-angular" \ No newline at end of file From 6f5a08750a9d8c1ffc43405069b38813468fea90 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Wed, 17 Apr 2024 13:57:13 +0200 Subject: [PATCH 02/12] Update poetry.lock --- poetry.lock | 81 ++++++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 45 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7ca44280..d58e98b2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "annotated-types" @@ -11,6 +11,9 @@ files = [ {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, ] +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + [[package]] name = "anyio" version = "4.3.0" @@ -194,6 +197,34 @@ files = [ {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, ] +[[package]] +name = "backports-zoneinfo" +version = "0.2.1" +description = "Backport of the standard library zoneinfo module" +optional = false +python-versions = ">=3.6" +files = [ + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win32.whl", hash = "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win32.whl", hash = "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win32.whl", hash = "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6"}, + {file = "backports.zoneinfo-0.2.1.tar.gz", hash = "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"}, +] + +[package.extras] +tzdata = ["tzdata"] + [[package]] name = "certifi" version = "2024.2.2" @@ -1372,6 +1403,7 @@ files = [ ] [package.dependencies] +"backports.zoneinfo" = {version = ">=0.2.0", markers = "python_version < \"3.9\""} psycopg-binary = {version = "3.1.18", optional = true, markers = "implementation_name != \"pypy\" and extra == \"binary\""} typing-extensions = ">=4.1" tzdata = {version = "*", markers = "sys_platform == \"win32\""} @@ -1724,6 +1756,7 @@ azure-storage-blob = "*" environs = "<=9.5.0" grpcio = ">=1.49.1,<=1.60.0" minio = ">=7.0.0" +numpy = {version = "<1.25.0", markers = "python_version <= \"3.8\""} pandas = ">=1.2.4" protobuf = ">=3.20.0" pyarrow = ">=12.0.0" @@ -1937,48 +1970,6 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] -[[package]] -name = "scipy" -version = "1.12.0" -description = "Fundamental algorithms for scientific computing in Python" -optional = false -python-versions = ">=3.9" -files = [ - {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"}, - {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"}, - {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"}, - {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"}, - {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"}, - {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"}, - {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"}, - {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"}, - {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"}, - {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"}, - {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"}, - {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"}, - {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"}, - {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"}, - {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"}, - {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"}, - {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"}, - {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"}, - {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"}, - {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"}, - {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"}, - {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"}, - {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"}, - {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"}, - {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"}, -] - -[package.dependencies] -numpy = ">=1.22.4,<1.29.0" - -[package.extras] -dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] -doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] -test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] - [[package]] name = "setuptools" version = "69.2.0" @@ -2314,5 +2305,5 @@ validators = "0.22.0" [metadata] lock-version = "2.0" -python-versions = ">=3.9,<3.12" -content-hash = "badb7b46af420d7b474a7b6e2aa8dc926d45ab8631342eee8ab8ab808d86c90c" +python-versions = ">=3.8,<3.12" +content-hash = "66b915f6915c79f83165dc5fb39f363ca53c493668ff87bb5b4953fb712cd4cc" From 3ff2be0290b0e8d8697edcd43daf203b8fff032c Mon Sep 17 00:00:00 2001 From: tellet-q Date: Wed, 17 Apr 2024 15:07:22 +0200 Subject: [PATCH 03/12] Use random-100 dataset --- .../manual-all-engines-benchmark.yaml | 39 +++++++++++-------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index 26b1a354..994d6e49 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -4,12 +4,17 @@ on: push: branches: - "ci/benchmark-all-engines" + workflow_dispatch: + inputs: + dataset: + description: "Dataset to benchmark" + default: random-100 jobs: elasticsearchBenchmark: - name: benchmark - elasticsearch-default - ${{ inputs.dataset }} against elasticsearch-single-node + name: benchmark - elasticsearch-default against elasticsearch-single-node timeout-minutes: 60 - runs-on: [self-hosted, linux, x64] + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -30,10 +35,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "elasticsearch-default" --datasets "glove-100-angular" + poetry run python3 run.py --engines "elasticsearch-default" --datasets "random-100" milvusBenchmark: - name: benchmark - milvus-default - ${{ inputs.dataset }} against milvus-single-node + name: benchmark - milvus-default against milvus-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -56,12 +61,12 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "milvus-default" --datasets "glove-100-angular" + poetry run python3 run.py --engines "milvus-default" --datasets "random-100" opensearchBenchmark: - name: benchmark - opensearch-default - opensearch-default against opensearch-single-node + name: benchmark - opensearch-default against opensearch-single-node timeout-minutes: 60 - runs-on: [self-hosted, linux, x64] + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -82,12 +87,12 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "opensearch-default" --datasets "glove-100-angular" + poetry run python3 run.py --engines "opensearch-default" --datasets "random-100" pgvectorBenchmark: - name: benchmark - pgvector - pgvector-default against pgvector-single-node + name: benchmark - pgvector-default against pgvector-single-node timeout-minutes: 60 - runs-on: [self-hosted, linux, x64] + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -108,10 +113,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "pgvector-default" --datasets "glove-100-angular" + poetry run python3 run.py --engines "pgvector-default" --datasets "random-100" qdrantBenchmark: - name: benchmark - qdrant - qdrant-default against qdrant-single-node + name: benchmark - qdrant-default against qdrant-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -134,10 +139,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "qdrant-default" --datasets "glove-100-angular" + poetry run python3 run.py --engines "qdrant-default" --datasets "random-100" redisBenchmark: - name: benchmark - redis - redis-default against redis-single-node + name: benchmark - redis-default against redis-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -160,10 +165,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "redis-default" --datasets "glove-100-angular" + poetry run python3 run.py --engines "redis-default" --datasets "random-100" weaviateBenchmark: - name: benchmark - weaviate - weaviate-default against weaviate-single-node + name: benchmark - weaviate-default against weaviate-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -186,4 +191,4 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "weaviate-default" --datasets "glove-100-angular" \ No newline at end of file + poetry run python3 run.py --engines "weaviate-default" --datasets "random-100" \ No newline at end of file From f7a47391e6bb5d418c59060b62cdd6907f7e0a10 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Wed, 17 Apr 2024 15:47:30 +0200 Subject: [PATCH 04/12] Introduce waits --- .../manual-all-engines-benchmark.yaml | 2 ++ tools/wait_for_elasticsearch_green_status.sh | 31 +++++++++++++++++++ tools/wait_for_opensearch.sh | 10 ++++++ 3 files changed, 43 insertions(+) create mode 100755 tools/wait_for_elasticsearch_green_status.sh create mode 100755 tools/wait_for_opensearch.sh diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index 994d6e49..4c9ebad6 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -34,6 +34,7 @@ jobs: compose-file: "engine/servers/elasticsearch-single-node/docker-compose.yaml" - name: Execution run: | + bash -x tools/wait_for_elasticsearch_green_status.sh source $(poetry env info -p)/bin/activate poetry run python3 run.py --engines "elasticsearch-default" --datasets "random-100" @@ -86,6 +87,7 @@ jobs: compose-file: "engine/servers/opensearch-single-node/docker-compose.yaml" - name: Execution run: | + bash -x tools/wait_for_opensearch.sh source $(poetry env info -p)/bin/activate poetry run python3 run.py --engines "opensearch-default" --datasets "random-100" diff --git a/tools/wait_for_elasticsearch_green_status.sh b/tools/wait_for_elasticsearch_green_status.sh new file mode 100755 index 00000000..e028eee7 --- /dev/null +++ b/tools/wait_for_elasticsearch_green_status.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -e + +ES_HOST=${1:-"localhost:9200"} + +until $(curl --output /dev/null --silent --head --fail "$ES_HOST"); do + printf '.' + sleep 1 +done + +# Wait for ES to start... +response=$(curl "$ES_HOST") +until [ "$response" = "200" ]; do + response=$(curl --write-out %{http_code} --silent --output /dev/null "$ES_HOST") + >&2 echo "Elastic Search is unavailable - sleeping" + sleep 1 +done + +# Wait for ES status to turn to Green +health="$(curl -fsSL "$ES_HOST/_cat/health?h=status")" +health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" # trim whitespace (otherwise we'll have "green ") + +until [ "$health" = 'green' ]; do + health="$(curl -fsSL "$ES_HOST/_cat/health?h=status")" + health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" # trim whitespace (otherwise we'll have "green ") + >&2 echo "Elastic Search is unavailable - sleeping" + sleep 1 +done + +>&2 echo "Elastic Search is up" \ No newline at end of file diff --git a/tools/wait_for_opensearch.sh b/tools/wait_for_opensearch.sh new file mode 100755 index 00000000..f531b74e --- /dev/null +++ b/tools/wait_for_opensearch.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +ES_HOST=${1:-"localhost:9200"} + +until $(curl --output /dev/null --silent --head --fail "$ES_HOST"); do + printf '.' + sleep 1 +done \ No newline at end of file From c734044beed1641a6894a306343dcf5593452d02 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Wed, 17 Apr 2024 16:08:39 +0200 Subject: [PATCH 05/12] Reduce mem in OS and ES --- .../manual-all-engines-benchmark.yaml | 12 +++++----- .../docker-compose.yaml | 23 +++++++++++++++++++ .../docker-compose.yaml | 21 +++++++++++++++++ tools/wait_for_elasticsearch_green_status.sh | 6 ++--- tools/wait_for_opensearch.sh | 4 +++- 5 files changed, 56 insertions(+), 10 deletions(-) create mode 100644 engine/servers/elasticsearch-single-node-ci/docker-compose.yaml create mode 100644 engine/servers/opensearch-single-node-ci/docker-compose.yaml diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index 4c9ebad6..f3ac3d64 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -12,7 +12,7 @@ on: jobs: elasticsearchBenchmark: - name: benchmark - elasticsearch-default against elasticsearch-single-node + name: benchmark - elasticsearch-default against elasticsearch-single-node-ci timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -31,10 +31,10 @@ jobs: - uses: hoverkraft-tech/compose-action@v2.0.0 with: - compose-file: "engine/servers/elasticsearch-single-node/docker-compose.yaml" + compose-file: "engine/servers/elasticsearch-single-node-ci/docker-compose.yaml" - name: Execution run: | - bash -x tools/wait_for_elasticsearch_green_status.sh + ./tools/wait_for_elasticsearch_green_status.sh source $(poetry env info -p)/bin/activate poetry run python3 run.py --engines "elasticsearch-default" --datasets "random-100" @@ -65,7 +65,7 @@ jobs: poetry run python3 run.py --engines "milvus-default" --datasets "random-100" opensearchBenchmark: - name: benchmark - opensearch-default against opensearch-single-node + name: benchmark - opensearch-default against opensearch-single-node-ci timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -84,10 +84,10 @@ jobs: - uses: hoverkraft-tech/compose-action@v2.0.0 with: - compose-file: "engine/servers/opensearch-single-node/docker-compose.yaml" + compose-file: "engine/servers/opensearch-single-node-ci/docker-compose.yaml" - name: Execution run: | - bash -x tools/wait_for_opensearch.sh + ./tools/wait_for_opensearch.sh source $(poetry env info -p)/bin/activate poetry run python3 run.py --engines "opensearch-default" --datasets "random-100" diff --git a/engine/servers/elasticsearch-single-node-ci/docker-compose.yaml b/engine/servers/elasticsearch-single-node-ci/docker-compose.yaml new file mode 100644 index 00000000..cdac3f65 --- /dev/null +++ b/engine/servers/elasticsearch-single-node-ci/docker-compose.yaml @@ -0,0 +1,23 @@ +version: '3.5' + +services: + es: + image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2 + environment: + ELASTIC_PASSWORD: "passwd" + KIBANA_PASSWORD: "passwd" + SERVER_SSL_ENABLED: "false" + discovery.type: "single-node" + xpack.security.enabled: "false" + ports: + - "9200:9200" + - "9300:9300" + logging: + driver: "json-file" + options: + max-file: 1 + max-size: 10m + deploy: + resources: + limits: + memory: 4Gb diff --git a/engine/servers/opensearch-single-node-ci/docker-compose.yaml b/engine/servers/opensearch-single-node-ci/docker-compose.yaml new file mode 100644 index 00000000..1e5205d8 --- /dev/null +++ b/engine/servers/opensearch-single-node-ci/docker-compose.yaml @@ -0,0 +1,21 @@ +version: '3.5' + +services: + opensearch: + image: opensearchproject/opensearch:2.10.0 + environment: + discovery.type: "single-node" + plugins.security.disabled: true + OPENSEARCH_JAVA_OPTS: "-Xms4g -Xmx4g" + ports: + - "9200:9200" + - "9300:9300" + logging: + driver: "json-file" + options: + max-file: 1 + max-size: 10m + deploy: + resources: + limits: + memory: 8Gb diff --git a/tools/wait_for_elasticsearch_green_status.sh b/tools/wait_for_elasticsearch_green_status.sh index e028eee7..c3051921 100755 --- a/tools/wait_for_elasticsearch_green_status.sh +++ b/tools/wait_for_elasticsearch_green_status.sh @@ -19,12 +19,12 @@ done # Wait for ES status to turn to Green health="$(curl -fsSL "$ES_HOST/_cat/health?h=status")" -health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" # trim whitespace (otherwise we'll have "green ") +health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" until [ "$health" = 'green' ]; do health="$(curl -fsSL "$ES_HOST/_cat/health?h=status")" - health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" # trim whitespace (otherwise we'll have "green ") - >&2 echo "Elastic Search is unavailable - sleeping" + health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" + >&2 echo "Elastic Search is yet unavailable, sleep 1" sleep 1 done diff --git a/tools/wait_for_opensearch.sh b/tools/wait_for_opensearch.sh index f531b74e..7f725b46 100755 --- a/tools/wait_for_opensearch.sh +++ b/tools/wait_for_opensearch.sh @@ -7,4 +7,6 @@ ES_HOST=${1:-"localhost:9200"} until $(curl --output /dev/null --silent --head --fail "$ES_HOST"); do printf '.' sleep 1 -done \ No newline at end of file +done + +>&2 echo "Open Search is up" \ No newline at end of file From cdf802ca2160e2efb6498d191ee03e4f1c4c7da7 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Wed, 17 Apr 2024 16:21:50 +0200 Subject: [PATCH 06/12] Use glove-25 for OS --- .../workflows/manual-all-engines-benchmark.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index f3ac3d64..e064285c 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -12,7 +12,7 @@ on: jobs: elasticsearchBenchmark: - name: benchmark - elasticsearch-default against elasticsearch-single-node-ci + name: benchmark - elasticsearch-default - random-100 - against elasticsearch-single-node-ci timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -39,7 +39,7 @@ jobs: poetry run python3 run.py --engines "elasticsearch-default" --datasets "random-100" milvusBenchmark: - name: benchmark - milvus-default against milvus-single-node + name: benchmark - milvus-default - random-100 - against milvus-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -65,7 +65,7 @@ jobs: poetry run python3 run.py --engines "milvus-default" --datasets "random-100" opensearchBenchmark: - name: benchmark - opensearch-default against opensearch-single-node-ci + name: benchmark - opensearch-default - glove-25-angular - against opensearch-single-node-ci timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -89,10 +89,10 @@ jobs: run: | ./tools/wait_for_opensearch.sh source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "opensearch-default" --datasets "random-100" + poetry run python3 run.py --engines "opensearch-default" --datasets "glove-25-angular" pgvectorBenchmark: - name: benchmark - pgvector-default against pgvector-single-node + name: benchmark - pgvector-default - random-100 - against pgvector-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -118,7 +118,7 @@ jobs: poetry run python3 run.py --engines "pgvector-default" --datasets "random-100" qdrantBenchmark: - name: benchmark - qdrant-default against qdrant-single-node + name: benchmark - qdrant-default - random-100 - against qdrant-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -144,7 +144,7 @@ jobs: poetry run python3 run.py --engines "qdrant-default" --datasets "random-100" redisBenchmark: - name: benchmark - redis-default against redis-single-node + name: benchmark - redis-default - random-100 - against redis-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -170,7 +170,7 @@ jobs: poetry run python3 run.py --engines "redis-default" --datasets "random-100" weaviateBenchmark: - name: benchmark - weaviate-default against weaviate-single-node + name: benchmark - weaviate-default - random-100 - against weaviate-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: From a2dfcb2dd175444464c8ed04ac48d190c1b73714 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Thu, 18 Apr 2024 08:54:26 +0200 Subject: [PATCH 07/12] Use 4Gb for OS --- .github/workflows/manual-all-engines-benchmark.yaml | 4 ++-- .../opensearch-single-node-ci/docker-compose.yaml | 4 ++-- ...arch_green_status.sh => wait_for_green_status.sh} | 10 +++++----- tools/wait_for_opensearch.sh | 12 ------------ 4 files changed, 9 insertions(+), 21 deletions(-) rename tools/{wait_for_elasticsearch_green_status.sh => wait_for_green_status.sh} (74%) delete mode 100755 tools/wait_for_opensearch.sh diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index e064285c..8108e770 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -34,7 +34,7 @@ jobs: compose-file: "engine/servers/elasticsearch-single-node-ci/docker-compose.yaml" - name: Execution run: | - ./tools/wait_for_elasticsearch_green_status.sh + ./tools/wait_for_green_status.sh source $(poetry env info -p)/bin/activate poetry run python3 run.py --engines "elasticsearch-default" --datasets "random-100" @@ -87,7 +87,7 @@ jobs: compose-file: "engine/servers/opensearch-single-node-ci/docker-compose.yaml" - name: Execution run: | - ./tools/wait_for_opensearch.sh + ./tools/wait_for_green_status.sh source $(poetry env info -p)/bin/activate poetry run python3 run.py --engines "opensearch-default" --datasets "glove-25-angular" diff --git a/engine/servers/opensearch-single-node-ci/docker-compose.yaml b/engine/servers/opensearch-single-node-ci/docker-compose.yaml index 1e5205d8..18d97779 100644 --- a/engine/servers/opensearch-single-node-ci/docker-compose.yaml +++ b/engine/servers/opensearch-single-node-ci/docker-compose.yaml @@ -6,7 +6,7 @@ services: environment: discovery.type: "single-node" plugins.security.disabled: true - OPENSEARCH_JAVA_OPTS: "-Xms4g -Xmx4g" + OPENSEARCH_JAVA_OPTS: "-Xms2g -Xmx2g" ports: - "9200:9200" - "9300:9300" @@ -18,4 +18,4 @@ services: deploy: resources: limits: - memory: 8Gb + memory: 4Gb diff --git a/tools/wait_for_elasticsearch_green_status.sh b/tools/wait_for_green_status.sh similarity index 74% rename from tools/wait_for_elasticsearch_green_status.sh rename to tools/wait_for_green_status.sh index c3051921..b10957bf 100755 --- a/tools/wait_for_elasticsearch_green_status.sh +++ b/tools/wait_for_green_status.sh @@ -9,23 +9,23 @@ until $(curl --output /dev/null --silent --head --fail "$ES_HOST"); do sleep 1 done -# Wait for ES to start... +# Wait for ES/OS to start... response=$(curl "$ES_HOST") until [ "$response" = "200" ]; do response=$(curl --write-out %{http_code} --silent --output /dev/null "$ES_HOST") - >&2 echo "Elastic Search is unavailable - sleeping" + >&2 echo "Search cluster is unavailable - sleeping" sleep 1 done -# Wait for ES status to turn to Green +# Wait for ES/OS status to turn Green health="$(curl -fsSL "$ES_HOST/_cat/health?h=status")" health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" until [ "$health" = 'green' ]; do health="$(curl -fsSL "$ES_HOST/_cat/health?h=status")" health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" - >&2 echo "Elastic Search is yet unavailable, sleep 1" + >&2 echo "Search cluster is yet unavailable, sleep 1" sleep 1 done ->&2 echo "Elastic Search is up" \ No newline at end of file +>&2 echo "Search cluster is up" \ No newline at end of file diff --git a/tools/wait_for_opensearch.sh b/tools/wait_for_opensearch.sh deleted file mode 100755 index 7f725b46..00000000 --- a/tools/wait_for_opensearch.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -set -e - -ES_HOST=${1:-"localhost:9200"} - -until $(curl --output /dev/null --silent --head --fail "$ES_HOST"); do - printf '.' - sleep 1 -done - ->&2 echo "Open Search is up" \ No newline at end of file From 9eb33501c9df915a58ac4692b1584130ba3b962d Mon Sep 17 00:00:00 2001 From: tellet-q Date: Thu, 18 Apr 2024 09:42:55 +0200 Subject: [PATCH 08/12] Avoid curl std output, use glove-25 for all engines --- .../manual-all-engines-benchmark.yaml | 33 +++++++++---------- tools/wait_for_green_status.sh | 23 +++++++------ 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index 8108e770..be49dbd9 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -5,14 +5,13 @@ on: branches: - "ci/benchmark-all-engines" workflow_dispatch: - inputs: - dataset: - description: "Dataset to benchmark" - default: random-100 + +env: + DATASET: "glove-25-angular" jobs: elasticsearchBenchmark: - name: benchmark - elasticsearch-default - random-100 - against elasticsearch-single-node-ci + name: benchmark - elasticsearch-default - $DATASET - against elasticsearch-single-node-ci timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -36,7 +35,7 @@ jobs: run: | ./tools/wait_for_green_status.sh source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "elasticsearch-default" --datasets "random-100" + poetry run python3 run.py --engines "elasticsearch-default" --datasets "$DATASET" milvusBenchmark: name: benchmark - milvus-default - random-100 - against milvus-single-node @@ -62,10 +61,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "milvus-default" --datasets "random-100" + poetry run python3 run.py --engines "milvus-default" --datasets "$DATASET" opensearchBenchmark: - name: benchmark - opensearch-default - glove-25-angular - against opensearch-single-node-ci + name: benchmark - opensearch-default - $DATASET - against opensearch-single-node-ci timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -89,10 +88,10 @@ jobs: run: | ./tools/wait_for_green_status.sh source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "opensearch-default" --datasets "glove-25-angular" + poetry run python3 run.py --engines "opensearch-default" --datasets "$DATASET" pgvectorBenchmark: - name: benchmark - pgvector-default - random-100 - against pgvector-single-node + name: benchmark - pgvector-default - $DATASET - against pgvector-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -115,10 +114,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "pgvector-default" --datasets "random-100" + poetry run python3 run.py --engines "pgvector-default" --datasets "$DATASET" qdrantBenchmark: - name: benchmark - qdrant-default - random-100 - against qdrant-single-node + name: benchmark - qdrant-default - $DATASET - against qdrant-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -141,10 +140,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "qdrant-default" --datasets "random-100" + poetry run python3 run.py --engines "qdrant-default" --datasets "$DATASET" redisBenchmark: - name: benchmark - redis-default - random-100 - against redis-single-node + name: benchmark - redis-default - $DATASET - against redis-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -167,10 +166,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "redis-default" --datasets "random-100" + poetry run python3 run.py --engines "redis-default" --datasets "$DATASET" weaviateBenchmark: - name: benchmark - weaviate-default - random-100 - against weaviate-single-node + name: benchmark - weaviate-default - $DATASET - against weaviate-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -193,4 +192,4 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "weaviate-default" --datasets "random-100" \ No newline at end of file + poetry run python3 run.py --engines "weaviate-default" --datasets "$DATASET" \ No newline at end of file diff --git a/tools/wait_for_green_status.sh b/tools/wait_for_green_status.sh index b10957bf..229a9527 100755 --- a/tools/wait_for_green_status.sh +++ b/tools/wait_for_green_status.sh @@ -1,30 +1,33 @@ #!/bin/bash +# This scripts helps to wait for Opensearch|Elasticsearch status to become Green set -e -ES_HOST=${1:-"localhost:9200"} +SEARCH_CLUSTER_HOST=${1:-"localhost:9200"} -until $(curl --output /dev/null --silent --head --fail "$ES_HOST"); do +# Wait until the search cluster host is available +until $(curl --output /dev/null --silent --head --fail "$SEARCH_CLUSTER_HOST"); do printf '.' - sleep 1 + sleep 1 # Wait for 1 second done -# Wait for ES/OS to start... -response=$(curl "$ES_HOST") +# Wait for ES/OS to start +response=$(curl --write-out %{http_code} --silent --output /dev/null "$SEARCH_CLUSTER_HOST") + until [ "$response" = "200" ]; do - response=$(curl --write-out %{http_code} --silent --output /dev/null "$ES_HOST") - >&2 echo "Search cluster is unavailable - sleeping" + response=$(curl --write-out %{http_code} --silent --output /dev/null "$SEARCH_CLUSTER_HOST") + >&2 echo "Search cluster is unavailable - sleep 1s" sleep 1 done # Wait for ES/OS status to turn Green -health="$(curl -fsSL "$ES_HOST/_cat/health?h=status")" +health="$(curl -fsSL "$SEARCH_CLUSTER_HOST/_cat/health?h=status")" health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" until [ "$health" = 'green' ]; do - health="$(curl -fsSL "$ES_HOST/_cat/health?h=status")" + health="$(curl -fsSL "$SEARCH_CLUSTER_HOST/_cat/health?h=status")" health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" - >&2 echo "Search cluster is yet unavailable, sleep 1" + >&2 echo "Search cluster status is not green yet - sleep 1s" sleep 1 done From 7fca270ff7450fd61f8c41f1e6cd21aceec5ddae Mon Sep 17 00:00:00 2001 From: tellet-q Date: Thu, 18 Apr 2024 09:48:58 +0200 Subject: [PATCH 09/12] Revert glove-25 for all engines --- .../manual-all-engines-benchmark.yaml | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index be49dbd9..b3c1ca2d 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -6,12 +6,9 @@ on: - "ci/benchmark-all-engines" workflow_dispatch: -env: - DATASET: "glove-25-angular" - jobs: elasticsearchBenchmark: - name: benchmark - elasticsearch-default - $DATASET - against elasticsearch-single-node-ci + name: benchmark - elasticsearch-default - random-100 - against elasticsearch-single-node-ci timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -35,7 +32,7 @@ jobs: run: | ./tools/wait_for_green_status.sh source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "elasticsearch-default" --datasets "$DATASET" + poetry run python3 run.py --engines "elasticsearch-default" --datasets "random-100" milvusBenchmark: name: benchmark - milvus-default - random-100 - against milvus-single-node @@ -61,10 +58,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "milvus-default" --datasets "$DATASET" + poetry run python3 run.py --engines "milvus-default" --datasets "random-100" opensearchBenchmark: - name: benchmark - opensearch-default - $DATASET - against opensearch-single-node-ci + name: benchmark - opensearch-default - glove-25-angular - against opensearch-single-node-ci timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -88,10 +85,10 @@ jobs: run: | ./tools/wait_for_green_status.sh source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "opensearch-default" --datasets "$DATASET" + poetry run python3 run.py --engines "opensearch-default" --datasets "glove-25-angular" pgvectorBenchmark: - name: benchmark - pgvector-default - $DATASET - against pgvector-single-node + name: benchmark - pgvector-default - random-100 - against pgvector-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -114,10 +111,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "pgvector-default" --datasets "$DATASET" + poetry run python3 run.py --engines "pgvector-default" --datasets "random-100" qdrantBenchmark: - name: benchmark - qdrant-default - $DATASET - against qdrant-single-node + name: benchmark - qdrant-default - random-100 - against qdrant-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -140,10 +137,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "qdrant-default" --datasets "$DATASET" + poetry run python3 run.py --engines "qdrant-default" --datasets "random-100" redisBenchmark: - name: benchmark - redis-default - $DATASET - against redis-single-node + name: benchmark - redis-default - random-100 - against redis-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -166,10 +163,10 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "redis-default" --datasets "$DATASET" + poetry run python3 run.py --engines "redis-default" --datasets "random-100" weaviateBenchmark: - name: benchmark - weaviate-default - $DATASET - against weaviate-single-node + name: benchmark - weaviate-default - random-100 - against weaviate-single-node timeout-minutes: 60 runs-on: ubuntu-latest steps: @@ -192,4 +189,4 @@ jobs: - name: Execution run: | source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "weaviate-default" --datasets "$DATASET" \ No newline at end of file + poetry run python3 run.py --engines "weaviate-default" --datasets "random-100" \ No newline at end of file From 139b12bbee66a5d7139f149528ddc4fe2aba2831 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Thu, 18 Apr 2024 10:43:28 +0200 Subject: [PATCH 10/12] Add action.yaml --- .../actions/run-engine-benchmark/action.yaml | 38 ++++ .../manual-all-engines-benchmark.yaml | 170 ++++-------------- 2 files changed, 73 insertions(+), 135 deletions(-) create mode 100644 .github/workflows/actions/run-engine-benchmark/action.yaml diff --git a/.github/workflows/actions/run-engine-benchmark/action.yaml b/.github/workflows/actions/run-engine-benchmark/action.yaml new file mode 100644 index 00000000..2e780b0c --- /dev/null +++ b/.github/workflows/actions/run-engine-benchmark/action.yaml @@ -0,0 +1,38 @@ +name: Run Engine Benchmark +description: "Run benchmark with specified params" +inputs: + engine: + description: "engine (i.e qdrant-default)" + required: true + dataset: + description: "dataset (i.e random-100)" + required: true + compose_file: + description: "path to docker compose" + required: true + +runs: + using: "composite" + steps: + - name: Install poetry + shell: bash + run: pip install poetry + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + - name: Install deps + shell: bash + run: poetry install + - uses: hoverkraft-tech/compose-action@v2.0.0 + with: + compose-file: "${{ inputs.compose_file }}" + - name: Execution + shell: bash + run: | + flag="${{ inputs.engine }}" + if [[ "$flag" == *"elasticsearch"* || "$flag" == *"opensearch"* ]]; then + ./tools/wait_for_green_status.sh + fi + source $(poetry env info -p)/bin/activate + poetry run python3 run.py --engines "${{ inputs.engine }}" --datasets "${{ inputs.dataset }}" \ No newline at end of file diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index b3c1ca2d..4fbf278b 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -9,184 +9,84 @@ on: jobs: elasticsearchBenchmark: name: benchmark - elasticsearch-default - random-100 - against elasticsearch-single-node-ci - timeout-minutes: 60 runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - - name: Install poetry - run: pip install poetry - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - cache: "poetry" - - - name: Install deps - run: poetry install --no-root - - - uses: hoverkraft-tech/compose-action@v2.0.0 + - uses: ./.github/workflows/actions/run-engine-benchmark with: - compose-file: "engine/servers/elasticsearch-single-node-ci/docker-compose.yaml" - - name: Execution - run: | - ./tools/wait_for_green_status.sh - source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "elasticsearch-default" --datasets "random-100" + engine: "elasticsearch-default" + dataset: "random-100" + compose_file: "engine/servers/elasticsearch-single-node-ci/docker-compose.yaml" milvusBenchmark: name: benchmark - milvus-default - random-100 - against milvus-single-node - timeout-minutes: 60 runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - - name: Install poetry - run: pip install poetry - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - cache: "poetry" - - - name: Install deps - run: poetry install - - - uses: hoverkraft-tech/compose-action@v2.0.0 + - uses: ./.github/workflows/actions/run-engine-benchmark with: - compose-file: "engine/servers/milvus-single-node/docker-compose.yaml" - - name: Execution - run: | - source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "milvus-default" --datasets "random-100" + engine: "milvus-default" + dataset: "random-100" + compose_file: "engine/servers/milvus-single-node/docker-compose.yaml" opensearchBenchmark: name: benchmark - opensearch-default - glove-25-angular - against opensearch-single-node-ci - timeout-minutes: 60 runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - - name: Install poetry - run: pip install poetry - - - uses: actions/setup-python@v5 + - uses: ./.github/workflows/actions/run-engine-benchmark with: - python-version: "3.10" - cache: "poetry" - - - name: Install deps - run: poetry install - - - uses: hoverkraft-tech/compose-action@v2.0.0 - with: - compose-file: "engine/servers/opensearch-single-node-ci/docker-compose.yaml" - - name: Execution - run: | - ./tools/wait_for_green_status.sh - source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "opensearch-default" --datasets "glove-25-angular" + engine: "opensearch-default" + dataset: "glove-25-angular" + compose_file: "engine/servers/opensearch-single-node-ci/docker-compose.yaml" pgvectorBenchmark: name: benchmark - pgvector-default - random-100 - against pgvector-single-node - timeout-minutes: 60 runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - - name: Install poetry - run: pip install poetry - - - uses: actions/setup-python@v5 + - uses: ./.github/workflows/actions/run-engine-benchmark with: - python-version: "3.10" - cache: "poetry" - - - name: Install deps - run: poetry install - - - uses: hoverkraft-tech/compose-action@v2.0.0 - with: - compose-file: "engine/servers/pgvector-single-node/docker-compose.yaml" - - name: Execution - run: | - source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "pgvector-default" --datasets "random-100" + engine: "pgvector-default" + dataset: "random-100" + compose_file: "engine/servers/pgvector-single-node/docker-compose.yaml" qdrantBenchmark: name: benchmark - qdrant-default - random-100 - against qdrant-single-node - timeout-minutes: 60 runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - - name: Install poetry - run: pip install poetry - - - uses: actions/setup-python@v5 + - uses: ./.github/workflows/actions/run-engine-benchmark with: - python-version: "3.10" - cache: "poetry" - - - name: Install deps - run: poetry install - - - uses: hoverkraft-tech/compose-action@v2.0.0 - with: - compose-file: "engine/servers/qdrant-single-node/docker-compose.yaml" - - name: Execution - run: | - source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "qdrant-default" --datasets "random-100" + engine: "qdrant-default" + dataset: "random-100" + compose_file: "engine/servers/qdrant-single-node/docker-compose.yaml" redisBenchmark: name: benchmark - redis-default - random-100 - against redis-single-node - timeout-minutes: 60 runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - - name: Install poetry - run: pip install poetry - - - uses: actions/setup-python@v5 + - uses: ./.github/workflows/actions/run-engine-benchmark with: - python-version: "3.10" - cache: "poetry" - - - name: Install deps - run: poetry install - - - uses: hoverkraft-tech/compose-action@v2.0.0 - with: - compose-file: "engine/servers/redis-single-node/docker-compose.yaml" - - name: Execution - run: | - source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "redis-default" --datasets "random-100" + engine: "redis-default" + dataset: "random-100" + compose_file: "engine/servers/redis-single-node/docker-compose.yaml" weaviateBenchmark: name: benchmark - weaviate-default - random-100 - against weaviate-single-node - timeout-minutes: 60 runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - - name: Install poetry - run: pip install poetry - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - cache: "poetry" - - - name: Install deps - run: poetry install - - - uses: hoverkraft-tech/compose-action@v2.0.0 + - uses: ./.github/workflows/actions/run-engine-benchmark with: - compose-file: "engine/servers/weaviate-single-node/docker-compose.yaml" - - name: Execution - run: | - source $(poetry env info -p)/bin/activate - poetry run python3 run.py --engines "weaviate-default" --datasets "random-100" \ No newline at end of file + engine: "weaviate-default" + dataset: "random-100" + compose_file: "engine/servers/weaviate-single-node/docker-compose.yaml" From 7a94bb34c17b1388c10ce2028015b3bbf193ad37 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Thu, 18 Apr 2024 12:17:39 +0200 Subject: [PATCH 11/12] Configure triggers --- .github/workflows/manual-all-engines-benchmark.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index 4fbf278b..7a7a031f 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -3,7 +3,11 @@ name: Manual All Engines Default Benchmarks on: push: branches: - - "ci/benchmark-all-engines" + - "master" + pull_request: + types: + - opened + - reopened workflow_dispatch: jobs: From cd4432ac7a412b9b50eaddb6ee9c2b8b37cc27db Mon Sep 17 00:00:00 2001 From: tellet-q Date: Thu, 18 Apr 2024 13:59:30 +0200 Subject: [PATCH 12/12] Address review --- .../actions/run-engine-benchmark/action.yaml | 4 +- .../manual-all-engines-benchmark.yaml | 42 +++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/.github/workflows/actions/run-engine-benchmark/action.yaml b/.github/workflows/actions/run-engine-benchmark/action.yaml index 2e780b0c..c1152814 100644 --- a/.github/workflows/actions/run-engine-benchmark/action.yaml +++ b/.github/workflows/actions/run-engine-benchmark/action.yaml @@ -30,8 +30,8 @@ runs: - name: Execution shell: bash run: | - flag="${{ inputs.engine }}" - if [[ "$flag" == *"elasticsearch"* || "$flag" == *"opensearch"* ]]; then + engine="${{ inputs.engine }}" + if [[ "$engine" == *"elasticsearch"* || "$engine" == *"opensearch"* ]]; then ./tools/wait_for_green_status.sh fi source $(poetry env info -p)/bin/activate diff --git a/.github/workflows/manual-all-engines-benchmark.yaml b/.github/workflows/manual-all-engines-benchmark.yaml index 7a7a031f..38737409 100644 --- a/.github/workflows/manual-all-engines-benchmark.yaml +++ b/.github/workflows/manual-all-engines-benchmark.yaml @@ -12,6 +12,12 @@ on: jobs: elasticsearchBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/elasticsearch') || + startsWith(github.event.head_commit.modified, 'engine/servers/elasticsearch') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) name: benchmark - elasticsearch-default - random-100 - against elasticsearch-single-node-ci runs-on: ubuntu-latest timeout-minutes: 30 @@ -24,6 +30,12 @@ jobs: compose_file: "engine/servers/elasticsearch-single-node-ci/docker-compose.yaml" milvusBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/milvus') || + startsWith(github.event.head_commit.modified, 'engine/servers/milvus') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) name: benchmark - milvus-default - random-100 - against milvus-single-node runs-on: ubuntu-latest timeout-minutes: 30 @@ -36,6 +48,12 @@ jobs: compose_file: "engine/servers/milvus-single-node/docker-compose.yaml" opensearchBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/opensearch') || + startsWith(github.event.head_commit.modified, 'engine/servers/opensearch') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) name: benchmark - opensearch-default - glove-25-angular - against opensearch-single-node-ci runs-on: ubuntu-latest timeout-minutes: 30 @@ -48,6 +66,12 @@ jobs: compose_file: "engine/servers/opensearch-single-node-ci/docker-compose.yaml" pgvectorBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/pgvector') || + startsWith(github.event.head_commit.modified, 'engine/servers/pgvector') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) name: benchmark - pgvector-default - random-100 - against pgvector-single-node runs-on: ubuntu-latest timeout-minutes: 30 @@ -60,6 +84,12 @@ jobs: compose_file: "engine/servers/pgvector-single-node/docker-compose.yaml" qdrantBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/qdrant') || + startsWith(github.event.head_commit.modified, 'engine/servers/qdrant') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) name: benchmark - qdrant-default - random-100 - against qdrant-single-node runs-on: ubuntu-latest timeout-minutes: 30 @@ -72,6 +102,12 @@ jobs: compose_file: "engine/servers/qdrant-single-node/docker-compose.yaml" redisBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/redis') || + startsWith(github.event.head_commit.modified, 'engine/servers/redis') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) name: benchmark - redis-default - random-100 - against redis-single-node runs-on: ubuntu-latest timeout-minutes: 30 @@ -84,6 +120,12 @@ jobs: compose_file: "engine/servers/redis-single-node/docker-compose.yaml" weaviateBenchmark: + if: > + ( + startsWith(github.event.head_commit.modified, 'engine/clients/weaviate') || + startsWith(github.event.head_commit.modified, 'engine/servers/weaviate') || + startsWith(github.event.head_commit.modified, 'engine/base_client/') + ) name: benchmark - weaviate-default - random-100 - against weaviate-single-node runs-on: ubuntu-latest timeout-minutes: 30