Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate running benchmarks for all engines #134

Merged
merged 12 commits into from
Apr 18, 2024
38 changes: 38 additions & 0 deletions .github/workflows/actions/run-engine-benchmark/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Run Engine Benchmark
description: "Run benchmark with specified params"
inputs:
engine:
description: "engine (i.e qdrant-default)"
required: true
dataset:
description: "dataset (i.e random-100)"
required: true
compose_file:
description: "path to docker compose"
required: true

runs:
using: "composite"
steps:
- name: Install poetry
shell: bash
run: pip install poetry
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "poetry"
- name: Install deps
shell: bash
run: poetry install
- uses: hoverkraft-tech/[email protected]
with:
compose-file: "${{ inputs.compose_file }}"
- name: Execution
shell: bash
run: |
engine="${{ inputs.engine }}"
if [[ "$engine" == *"elasticsearch"* || "$engine" == *"opensearch"* ]]; then
./tools/wait_for_green_status.sh
fi
source $(poetry env info -p)/bin/activate
poetry run python3 run.py --engines "${{ inputs.engine }}" --datasets "${{ inputs.dataset }}"
138 changes: 138 additions & 0 deletions .github/workflows/manual-all-engines-benchmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
name: Manual All Engines Default Benchmarks

on:
push:
branches:
- "master"
pull_request:
types:
- opened
- reopened
workflow_dispatch:

jobs:
elasticsearchBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/elasticsearch') ||
startsWith(github.event.head_commit.modified, 'engine/servers/elasticsearch') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - elasticsearch-default - random-100 - against elasticsearch-single-node-ci
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "elasticsearch-default"
dataset: "random-100"
compose_file: "engine/servers/elasticsearch-single-node-ci/docker-compose.yaml"

milvusBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/milvus') ||
startsWith(github.event.head_commit.modified, 'engine/servers/milvus') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - milvus-default - random-100 - against milvus-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "milvus-default"
dataset: "random-100"
compose_file: "engine/servers/milvus-single-node/docker-compose.yaml"

opensearchBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/opensearch') ||
startsWith(github.event.head_commit.modified, 'engine/servers/opensearch') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - opensearch-default - glove-25-angular - against opensearch-single-node-ci
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "opensearch-default"
dataset: "glove-25-angular"
compose_file: "engine/servers/opensearch-single-node-ci/docker-compose.yaml"

pgvectorBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/pgvector') ||
startsWith(github.event.head_commit.modified, 'engine/servers/pgvector') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - pgvector-default - random-100 - against pgvector-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "pgvector-default"
dataset: "random-100"
compose_file: "engine/servers/pgvector-single-node/docker-compose.yaml"

qdrantBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/qdrant') ||
startsWith(github.event.head_commit.modified, 'engine/servers/qdrant') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - qdrant-default - random-100 - against qdrant-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "qdrant-default"
dataset: "random-100"
compose_file: "engine/servers/qdrant-single-node/docker-compose.yaml"

redisBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/redis') ||
startsWith(github.event.head_commit.modified, 'engine/servers/redis') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - redis-default - random-100 - against redis-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "redis-default"
dataset: "random-100"
compose_file: "engine/servers/redis-single-node/docker-compose.yaml"

weaviateBenchmark:
if: >
(
startsWith(github.event.head_commit.modified, 'engine/clients/weaviate') ||
startsWith(github.event.head_commit.modified, 'engine/servers/weaviate') ||
startsWith(github.event.head_commit.modified, 'engine/base_client/')
)
name: benchmark - weaviate-default - random-100 - against weaviate-single-node
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/workflows/actions/run-engine-benchmark
with:
engine: "weaviate-default"
dataset: "random-100"
compose_file: "engine/servers/weaviate-single-node/docker-compose.yaml"
23 changes: 23 additions & 0 deletions engine/servers/elasticsearch-single-node-ci/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
version: '3.5'

services:
es:
image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2
environment:
ELASTIC_PASSWORD: "passwd"
KIBANA_PASSWORD: "passwd"
SERVER_SSL_ENABLED: "false"
discovery.type: "single-node"
xpack.security.enabled: "false"
ports:
- "9200:9200"
- "9300:9300"
logging:
driver: "json-file"
options:
max-file: 1
max-size: 10m
deploy:
resources:
limits:
memory: 4Gb
21 changes: 21 additions & 0 deletions engine/servers/opensearch-single-node-ci/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: '3.5'

services:
opensearch:
image: opensearchproject/opensearch:2.10.0
environment:
discovery.type: "single-node"
plugins.security.disabled: true
OPENSEARCH_JAVA_OPTS: "-Xms2g -Xmx2g"
ports:
- "9200:9200"
- "9300:9300"
logging:
driver: "json-file"
options:
max-file: 1
max-size: 10m
deploy:
resources:
limits:
memory: 4Gb
81 changes: 36 additions & 45 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions tools/wait_for_green_status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# This scripts helps to wait for Opensearch|Elasticsearch status to become Green

set -e

SEARCH_CLUSTER_HOST=${1:-"localhost:9200"}

# Wait until the search cluster host is available
until $(curl --output /dev/null --silent --head --fail "$SEARCH_CLUSTER_HOST"); do
printf '.'
sleep 1 # Wait for 1 second
done

# Wait for ES/OS to start
response=$(curl --write-out %{http_code} --silent --output /dev/null "$SEARCH_CLUSTER_HOST")

until [ "$response" = "200" ]; do
response=$(curl --write-out %{http_code} --silent --output /dev/null "$SEARCH_CLUSTER_HOST")
>&2 echo "Search cluster is unavailable - sleep 1s"
sleep 1
done

# Wait for ES/OS status to turn Green
health="$(curl -fsSL "$SEARCH_CLUSTER_HOST/_cat/health?h=status")"
health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')"

until [ "$health" = 'green' ]; do
health="$(curl -fsSL "$SEARCH_CLUSTER_HOST/_cat/health?h=status")"
health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')"
>&2 echo "Search cluster status is not green yet - sleep 1s"
KShivendu marked this conversation as resolved.
Show resolved Hide resolved
sleep 1
done

>&2 echo "Search cluster is up"