Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: review all extra #6029

Merged
merged 10 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ jobs:
include:
- topic: document_stores
os: ubuntu-latest
dependencies: elasticsearch8,faiss,weaviate,pinecone,opensearch,inference,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws,dev
dependencies: elasticsearch8,faiss,weaviate,pinecone,opensearch,inference,crawler,preprocessing,file-conversion,pdf,ocr,metrics,dev
- topic: document_stores
os: windows-latest
dependencies: elasticsearch8,faiss,weaviate,pinecone,opensearch,inference,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws,dev
dependencies: elasticsearch8,faiss,weaviate,pinecone,opensearch,inference,crawler,preprocessing,file-conversion,pdf,ocr,metrics,dev
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -329,7 +329,7 @@ jobs:
runs-on: ${{ matrix.os }}
services:
elasticsearch:
image: elasticsearch:8.8.0
image: elasticsearch:8.10.2
env:
discovery.type: "single-node"
xpack.security.enabled: "false"
Expand All @@ -346,9 +346,36 @@ jobs:
- name: Install Haystack
run: pip install .[elasticsearch8,dev,preprocessing,inference]

- name: Make elasticsearch comfortable with a disk almost full
run: |
curl -X PUT "localhost:9200/_cluster/settings?pretty" -H 'Content-Type: application/json' -d'
{
"persistent": {
"cluster.routing.allocation.disk.watermark.low": "90%",
"cluster.routing.allocation.disk.watermark.low.max_headroom": "100GB",
"cluster.routing.allocation.disk.watermark.high": "95%",
"cluster.routing.allocation.disk.watermark.high.max_headroom": "20GB",
"cluster.routing.allocation.disk.watermark.flood_stage": "97%",
"cluster.routing.allocation.disk.watermark.flood_stage.max_headroom": "5GB",
"cluster.routing.allocation.disk.watermark.flood_stage.frozen": "97%",
"cluster.routing.allocation.disk.watermark.flood_stage.frozen.max_headroom": "5GB"
}
}
'
curl -X PUT "localhost:9200/*/_settings?expand_wildcards=all&pretty" -H 'Content-Type: application/json' -d'
{
"index.blocks.read_only_allow_delete": null
}
'

- name: Run tests
run: |
pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_elasticsearch.py
pytest -x -m"document_store and integration" test/document_stores/test_elasticsearch.py

- name: logs
if: failure()
run: |
docker logs "${{ job.services.elasticsearch.id }}"

- name: Calculate alert data
id: calculator
Expand Down
2 changes: 1 addition & 1 deletion haystack/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,7 @@ def eval_beir(
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
except ModuleNotFoundError as e:
raise HaystackError("beir is not installed. Please run `pip install farm-haystack[beir]`...") from e
raise HaystackError("beir is not installed. Please run `pip install beir`") from e

url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip"
data_path = util.download_and_unzip(url, dataset_dir)
Expand Down
7 changes: 2 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,6 @@ docstores-gpu = [
audio = [
"openai-whisper"
]
beir = [
"beir; platform_system != 'Windows'",
]
aws = [
"boto3",
# Costraint botocore to avoid taking to much time to resolve the dependency tree.
Expand Down Expand Up @@ -246,11 +243,11 @@ formatting = [
]

all = [
"farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics,aws,preview]",
"farm-haystack[inference,docstores,crawler,preprocessing,file-conversion,pdf,ocr,metrics,aws,preview]",
]
all-gpu = [
# beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
"farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics,aws,preview]",
"farm-haystack[inference,docstores-gpu,crawler,preprocessing,file-conversion,pdf,ocr,metrics,aws,preview]",
]

[project.scripts]
Expand Down
3 changes: 3 additions & 0 deletions releasenotes/notes/review-all-extras-42d5a3a3d61f5393.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
upgrade:
- |
Removes the `audio`, `ray`, `onnx` and `beir` extras from the extra group `all`.
16 changes: 7 additions & 9 deletions test/pipelines/test_ray.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
from pathlib import Path

import pytest
import ray

from haystack.pipelines import RayPipeline


@pytest.fixture(autouse=True)
def shutdown_ray():
yield
@pytest.fixture()
def ray():
try:
import ray

yield ray

ray.serve.shutdown()
ray.shutdown()
except:
Expand All @@ -21,7 +19,7 @@ def shutdown_ray():
@pytest.mark.integration
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
@pytest.mark.parametrize("serve_detached", [True, False])
def test_load_pipeline(document_store_with_docs, serve_detached, samples_path):
def test_load_pipeline(ray, document_store_with_docs, serve_detached, samples_path):
pipeline = RayPipeline.load_from_yaml(
samples_path / "pipeline" / "ray.simple.haystack-pipeline.yml",
pipeline_name="ray_query_pipeline",
Expand All @@ -43,7 +41,7 @@ def test_load_pipeline(document_store_with_docs, serve_detached, samples_path):

@pytest.mark.integration
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
def test_load_advanced_pipeline(document_store_with_docs, samples_path):
def test_load_advanced_pipeline(ray, document_store_with_docs, samples_path):
pipeline = RayPipeline.load_from_yaml(
samples_path / "pipeline" / "ray.advanced.haystack-pipeline.yml",
pipeline_name="ray_query_pipeline",
Expand Down Expand Up @@ -71,7 +69,7 @@ def test_load_advanced_pipeline(document_store_with_docs, samples_path):
@pytest.mark.asyncio
@pytest.mark.integration
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
async def test_load_advanced_pipeline_async(document_store_with_docs, samples_path):
async def test_load_advanced_pipeline_async(ray, document_store_with_docs, samples_path):
pipeline = RayPipeline.load_from_yaml(
samples_path / "pipeline" / "ray.advanced.haystack-pipeline.yml",
pipeline_name="ray_query_pipeline",
Expand Down