From 97db614f9524cbd630f5b825c17ffdd661098ca8 Mon Sep 17 00:00:00 2001 From: KShivendu Date: Tue, 4 Jun 2024 00:52:33 +0530 Subject: [PATCH] feat: Add DBpedia OpenAI embedding dataset with 100k vectors --- .github/workflows/continuous-benchmark.yaml | 2 +- datasets/datasets.json | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/continuous-benchmark.yaml b/.github/workflows/continuous-benchmark.yaml index 6e8b6e61..370b8197 100644 --- a/.github/workflows/continuous-benchmark.yaml +++ b/.github/workflows/continuous-benchmark.yaml @@ -26,7 +26,7 @@ jobs: DATASET_TO_ENGINE["laion-small-clip"]="qdrant-continuous-benchmark" DATASET_TO_ENGINE["msmarco-sparse-100K"]="qdrant-sparse-vector" DATASET_TO_ENGINE["h-and-m-2048-angular-filters"]="qdrant-continuous-benchmark" - DATASET_TO_ENGINE["dbpedia-openai-1M-1536-angular"]="qdrant-bq-continuous-benchmark" + DATASET_TO_ENGINE["dbpedia-openai-100K-1536-angular"]="qdrant-bq-continuous-benchmark" for dataset in "${!DATASET_TO_ENGINE[@]}"; do export ENGINE_NAME=${DATASET_TO_ENGINE[$dataset]} diff --git a/datasets/datasets.json b/datasets/datasets.json index f2e646de..c9111a04 100644 --- a/datasets/datasets.json +++ b/datasets/datasets.json @@ -66,6 +66,14 @@ "path": "dbpedia-openai-1M-1536-angular/dbpedia_openai_1M", "link": "https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz" }, + { + "name": "dbpedia-openai-100K-1536-angular", + "vector_size": 1536, + "distance": "cosine", + "type": "tar", + "path": "dbpedia-openai-100K-1536-angular/dbpedia_openai_100K", + "link": "https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_100K.tgz" + }, { "name": "msmarco-sparse-100K", "type": "sparse",