From e05632513f4e3808b838f55ab1ac048d923f10b7 Mon Sep 17 00:00:00 2001 From: Grant Williams Date: Fri, 6 Oct 2023 09:36:30 -0500 Subject: [PATCH 01/18] Upgrade transformers to the latest version 4.34.0 so that Haystack can support the new Mistral, Nougat, and other models. --- pyproject.toml | 4 +- ...transformers-to-4-34-38d045d8e42ea0a2.yaml | 37 +++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml diff --git a/pyproject.toml b/pyproject.toml index cc4cef492e..3846ad108c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ dependencies = [ "requests", "httpx", "pydantic<2", - "transformers==4.32.1", + "transformers==4.34.0", "pandas", "rank_bm25", "scikit-learn>=1.3.0", # TF-IDF and metrics @@ -99,7 +99,7 @@ preview = [ "openai", ] inference = [ - "transformers[torch,sentencepiece]==4.32.1", + "transformers[torch,sentencepiece]==4.34.0", "sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder "huggingface-hub>=0.5.0", ] diff --git a/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml b/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml new file mode 100644 index 0000000000..15e507e92a --- /dev/null +++ b/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml @@ -0,0 +1,37 @@ +--- +prelude: > + Replace this text with content to appear at the top of the section for this + release. This is equivalent to the "Highlights" section we used before. + The prelude might repeat some details that are also present in other notes + from the same release, that's ok. Not every release note requires a prelude, + use it only to describe major features or notable changes. +upgrade: + - | + List upgrade notes here, or remove this section. + Upgrade notes should be rare: only list known/potential breaking changes, + or major changes that require user action before the upgrade. + Notes here must include steps that users can follow to 1. know if they're + affected and 2. handle the change gracefully on their end. +features: + - | + List new features here, or remove this section. +enhancements: + - | + List new behavior that is too small to be + considered a new feature, or remove this section. +issues: + - | + List known issues here, or remove this section. For example, if some change is experimental or known to not work in some cases, it should be mentioned here. +deprecations: + - | + List deprecations notes here, or remove this section. Deprecations should not be used for something that is removed in the release, use upgrade section instead. Deprecation should allow time for users to make necessary changes for the removal to happen in a future release. +security: + - | + Add security notes here, or remove this section. +fixes: + - | + Add normal bug fixes here, or remove this section. +preview: + - | + Add changes to Haystack version 2, or remove this section. + Haystack version 2 can be found under haystack/preview. From 6d98b287a5c02abc2f5d5c5e574d9ea9aea7e33e Mon Sep 17 00:00:00 2001 From: Grant Williams Date: Fri, 6 Oct 2023 09:44:51 -0500 Subject: [PATCH 02/18] update release notes --- ...transformers-to-4-34-38d045d8e42ea0a2.yaml | 36 ++----------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml b/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml index 15e507e92a..06357fc4ac 100644 --- a/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml +++ b/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml @@ -1,37 +1,5 @@ --- -prelude: > - Replace this text with content to appear at the top of the section for this - release. This is equivalent to the "Highlights" section we used before. - The prelude might repeat some details that are also present in other notes - from the same release, that's ok. Not every release note requires a prelude, - use it only to describe major features or notable changes. -upgrade: - - | - List upgrade notes here, or remove this section. - Upgrade notes should be rare: only list known/potential breaking changes, - or major changes that require user action before the upgrade. - Notes here must include steps that users can follow to 1. know if they're - affected and 2. handle the change gracefully on their end. -features: - - | - List new features here, or remove this section. enhancements: - | - List new behavior that is too small to be - considered a new feature, or remove this section. -issues: - - | - List known issues here, or remove this section. For example, if some change is experimental or known to not work in some cases, it should be mentioned here. -deprecations: - - | - List deprecations notes here, or remove this section. Deprecations should not be used for something that is removed in the release, use upgrade section instead. Deprecation should allow time for users to make necessary changes for the removal to happen in a future release. -security: - - | - Add security notes here, or remove this section. -fixes: - - | - Add normal bug fixes here, or remove this section. -preview: - - | - Add changes to Haystack version 2, or remove this section. - Haystack version 2 can be found under haystack/preview. + Upgrade Transformers to the latest version 4.34.0. + This version adds support for the new Mistral, Persimmon, BROS, ViTMatte, and Nougat models. From c3746bae14c4ba0eff4f6c3ae332187b96b2969d Mon Sep 17 00:00:00 2001 From: Grant Williams Date: Fri, 6 Oct 2023 09:56:18 -0500 Subject: [PATCH 03/18] updated missing lazy import --- haystack/preview/components/readers/extractive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index 081646dd26..0be7c832d9 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -7,7 +7,7 @@ from haystack.preview.lazy_imports import LazyImport with LazyImport( - "Run 'pip install transformers[torch,sentencepiece]==4.32.1 sentence-transformers>=2.2.0'" + "Run 'pip install transformers[torch,sentencepiece]==4.34.0 sentence-transformers>=2.2.0'" ) as torch_and_transformers_import: from transformers import AutoModelForQuestionAnswering, AutoTokenizer from tokenizers import Encoding From da4249fa84a06217a8fc8e1c0e7b76acdcd0c242 Mon Sep 17 00:00:00 2001 From: Grant Williams Date: Fri, 6 Oct 2023 10:02:43 -0500 Subject: [PATCH 04/18] Update .github workflows imports --- .github/workflows/tests.yml | 2 +- .github/workflows/tests_preview.yml | 2 +- haystack/preview/components/rankers/similarity.py | 2 +- haystack/preview/components/readers/extractive.py | 10 +++++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4ecdd1fe85..531ba4748d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -201,7 +201,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[preview,dev] langdetect transformers[torch,sentencepiece]==4.32.1 sentence-transformers>=2.2.0 pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[preview,dev] langdetect transformers[torch,sentencepiece]==4.34.0 sentence-transformers>=2.2.0 pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run run: pytest --cov-report xml:coverage.xml --cov="haystack" -m "unit" test/preview diff --git a/.github/workflows/tests_preview.yml b/.github/workflows/tests_preview.yml index 9f570491b7..2d8440c0f9 100644 --- a/.github/workflows/tests_preview.yml +++ b/.github/workflows/tests_preview.yml @@ -116,7 +116,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run run: pytest -m "unit" test/preview diff --git a/haystack/preview/components/rankers/similarity.py b/haystack/preview/components/rankers/similarity.py index 7abb370954..5ccc82cb08 100644 --- a/haystack/preview/components/rankers/similarity.py +++ b/haystack/preview/components/rankers/similarity.py @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) -with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]==4.32.1'") as torch_and_transformers_import: +with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]==4.34.0'") as torch_and_transformers_import: import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index 0be7c832d9..9176659487 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -1,17 +1,17 @@ -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union import math import warnings +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union -from haystack.preview import component, default_from_dict, default_to_dict, ComponentError, Document, ExtractedAnswer +from haystack.preview import ComponentError, Document, ExtractedAnswer, component, default_from_dict, default_to_dict from haystack.preview.lazy_imports import LazyImport with LazyImport( "Run 'pip install transformers[torch,sentencepiece]==4.34.0 sentence-transformers>=2.2.0'" ) as torch_and_transformers_import: - from transformers import AutoModelForQuestionAnswering, AutoTokenizer - from tokenizers import Encoding import torch + from tokenizers import Encoding + from transformers import AutoModelForQuestionAnswering, AutoTokenizer @component From df2abfcf8a6d9d39d94747aa03ea1ee3a261b0b9 Mon Sep 17 00:00:00 2001 From: Grant Williams Date: Fri, 6 Oct 2023 10:30:04 -0500 Subject: [PATCH 05/18] bump more versions in .github workflows --- .github/workflows/e2e_preview.yml | 2 +- .github/workflows/tests.yml | 2 +- .github/workflows/tests_preview.yml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/e2e_preview.yml b/.github/workflows/e2e_preview.yml index aa54f8963e..efb2edbce0 100644 --- a/.github/workflows/e2e_preview.yml +++ b/.github/workflows/e2e_preview.yml @@ -36,7 +36,7 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run tests run: pytest e2e/preview diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 531ba4748d..7133dcdcaa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -918,7 +918,7 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run tests run: | diff --git a/.github/workflows/tests_preview.yml b/.github/workflows/tests_preview.yml index 2d8440c0f9..b11ac3a965 100644 --- a/.github/workflows/tests_preview.yml +++ b/.github/workflows/tests_preview.yml @@ -175,7 +175,7 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run run: pytest --maxfail=5 -m "integration" test/preview @@ -230,7 +230,7 @@ jobs: colima start - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run Tika run: docker run -d -p 9998:9998 apache/tika:2.9.0.0 @@ -282,7 +282,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run run: pytest --maxfail=5 -m "integration" test/preview -k 'not tika' From eb28cc174e8c19bcc696d069c1b4914e9df6edb8 Mon Sep 17 00:00:00 2001 From: Grant Williams Date: Fri, 6 Oct 2023 10:35:25 -0500 Subject: [PATCH 06/18] rever import sorting --- haystack/preview/components/readers/extractive.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index 9176659487..0be7c832d9 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -1,17 +1,17 @@ -import math -import warnings from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union +import math +import warnings -from haystack.preview import ComponentError, Document, ExtractedAnswer, component, default_from_dict, default_to_dict +from haystack.preview import component, default_from_dict, default_to_dict, ComponentError, Document, ExtractedAnswer from haystack.preview.lazy_imports import LazyImport with LazyImport( "Run 'pip install transformers[torch,sentencepiece]==4.34.0 sentence-transformers>=2.2.0'" ) as torch_and_transformers_import: - import torch - from tokenizers import Encoding from transformers import AutoModelForQuestionAnswering, AutoTokenizer + from tokenizers import Encoding + import torch @component From 82f16caec690713899fe550ae6b3e963cb98cc16 Mon Sep 17 00:00:00 2001 From: Grant Williams Date: Fri, 6 Oct 2023 11:07:07 -0500 Subject: [PATCH 07/18] Update to catch runtime errors to match haystack_hub changes --- test/modeling/test_model_loading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/modeling/test_model_loading.py b/test/modeling/test_model_loading.py index c74072f6cc..859ad23d71 100644 --- a/test/modeling/test_model_loading.py +++ b/test/modeling/test_model_loading.py @@ -27,7 +27,7 @@ def test_basic_loading(pretrained_model_name_or_path, lm_class, monkeypatch): @pytest.mark.unit def test_basic_loading_unknown_model(): - with pytest.raises(OSError): + with pytest.raises(RuntimeError): get_language_model("model_that_doesnt_exist") From e88ccfd353984c3af5a3fb122502a6dcc9123e77 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Sun, 8 Oct 2023 12:18:37 +0200 Subject: [PATCH 08/18] add language parameter value to whisper test --- test/preview/components/audio/test_whisper_local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/preview/components/audio/test_whisper_local.py b/test/preview/components/audio/test_whisper_local.py index 692d2a3c41..e8e4c64319 100644 --- a/test/preview/components/audio/test_whisper_local.py +++ b/test/preview/components/audio/test_whisper_local.py @@ -160,7 +160,7 @@ def test_transcribe_stream(self): @pytest.mark.integration @pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="ffmpeg not installed on Windows CI") def test_whisper_local_transcriber(self, preview_samples_path): - comp = LocalWhisperTranscriber(model_name_or_path="medium") + comp = LocalWhisperTranscriber(model_name_or_path="medium", whisper_params={"language": "english"}) comp.warm_up() output = comp.run( audio_files=[ From e8f32dccf4973cba3720ea6f40bee5e141bac9be Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 11 Oct 2023 15:08:24 +0200 Subject: [PATCH 09/18] bump transformers version in linting preview workflow --- .github/workflows/linting_preview.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting_preview.yml b/.github/workflows/linting_preview.yml index 3e4cbfdc89..37c2b9731d 100644 --- a/.github/workflows/linting_preview.yml +++ b/.github/workflows/linting_preview.yml @@ -71,7 +71,7 @@ jobs: - name: Install Haystack run: | - pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' pip install ./haystack-linter - name: Pylint From 2f2851641e12c0ff2d18ac5438145f955e1747c3 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 11 Oct 2023 15:22:32 +0200 Subject: [PATCH 10/18] bump transformers version in linting preview workflow --- .github/workflows/linting_preview.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting_preview.yml b/.github/workflows/linting_preview.yml index 37c2b9731d..3a75252cc4 100644 --- a/.github/workflows/linting_preview.yml +++ b/.github/workflows/linting_preview.yml @@ -37,7 +37,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Mypy if: steps.files.outputs.any_changed == 'true' From 5229fdd7f3e0758ac161d6dfe7406fc0e9ea0c77 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Thu, 19 Oct 2023 08:35:03 +0200 Subject: [PATCH 11/18] bump version to v4.34.1 --- .github/workflows/e2e_preview.yml | 2 +- .github/workflows/linting_preview.yml | 4 ++-- .github/workflows/tests.yml | 4 ++-- .github/workflows/tests_preview.yml | 8 ++++---- haystack/preview/components/rankers/similarity.py | 2 +- haystack/preview/components/readers/extractive.py | 2 +- pyproject.toml | 4 ++-- .../notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/e2e_preview.yml b/.github/workflows/e2e_preview.yml index efb2edbce0..bf017a0a60 100644 --- a/.github/workflows/e2e_preview.yml +++ b/.github/workflows/e2e_preview.yml @@ -36,7 +36,7 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run tests run: pytest e2e/preview diff --git a/.github/workflows/linting_preview.yml b/.github/workflows/linting_preview.yml index 3a75252cc4..8cac2b6947 100644 --- a/.github/workflows/linting_preview.yml +++ b/.github/workflows/linting_preview.yml @@ -37,7 +37,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Mypy if: steps.files.outputs.any_changed == 'true' @@ -71,7 +71,7 @@ jobs: - name: Install Haystack run: | - pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' pip install ./haystack-linter - name: Pylint diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2419318360..0960fcdf53 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -201,7 +201,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[preview,dev] langdetect transformers[torch,sentencepiece]==4.34.0 sentence-transformers>=2.2.0 pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[preview,dev] langdetect transformers[torch,sentencepiece]==4.34.1 sentence-transformers>=2.2.0 pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run run: pytest --cov-report xml:coverage.xml --cov="haystack" -m "unit" test/preview @@ -945,7 +945,7 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run tests run: | diff --git a/.github/workflows/tests_preview.yml b/.github/workflows/tests_preview.yml index b11ac3a965..eee42ab4f0 100644 --- a/.github/workflows/tests_preview.yml +++ b/.github/workflows/tests_preview.yml @@ -116,7 +116,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run run: pytest -m "unit" test/preview @@ -175,7 +175,7 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run run: pytest --maxfail=5 -m "integration" test/preview @@ -230,7 +230,7 @@ jobs: colima start - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run Tika run: docker run -d -p 9998:9998 apache/tika:2.9.0.0 @@ -282,7 +282,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.0 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' - name: Run run: pytest --maxfail=5 -m "integration" test/preview -k 'not tika' diff --git a/haystack/preview/components/rankers/similarity.py b/haystack/preview/components/rankers/similarity.py index 483ec19310..1550b5c69e 100644 --- a/haystack/preview/components/rankers/similarity.py +++ b/haystack/preview/components/rankers/similarity.py @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) -with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]==4.34.0'") as torch_and_transformers_import: +with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]==4.34.1'") as torch_and_transformers_import: import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index 0be7c832d9..1ccfa0aa9e 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -7,7 +7,7 @@ from haystack.preview.lazy_imports import LazyImport with LazyImport( - "Run 'pip install transformers[torch,sentencepiece]==4.34.0 sentence-transformers>=2.2.0'" + "Run 'pip install transformers[torch,sentencepiece]==4.34.1 sentence-transformers>=2.2.0'" ) as torch_and_transformers_import: from transformers import AutoModelForQuestionAnswering, AutoTokenizer from tokenizers import Encoding diff --git a/pyproject.toml b/pyproject.toml index b034071e75..95753304ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ dependencies = [ "requests", "httpx", "pydantic<2", - "transformers==4.34.0", + "transformers==4.34.1", "pandas", "rank_bm25", "scikit-learn>=1.3.0", # TF-IDF and metrics @@ -99,7 +99,7 @@ preview = [ "openai", ] inference = [ - "transformers[torch,sentencepiece]==4.34.0", + "transformers[torch,sentencepiece]==4.34.1", "sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder "huggingface-hub>=0.5.0", ] diff --git a/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml b/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml index 06357fc4ac..a147f7ef95 100644 --- a/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml +++ b/releasenotes/notes/bump-transformers-to-4-34-38d045d8e42ea0a2.yaml @@ -1,5 +1,5 @@ --- enhancements: - | - Upgrade Transformers to the latest version 4.34.0. + Upgrade Transformers to the latest version 4.34.1. This version adds support for the new Mistral, Persimmon, BROS, ViTMatte, and Nougat models. From db68156257b5cc333276bece7d5f7f1ec9a64813 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Thu, 19 Oct 2023 09:22:41 +0200 Subject: [PATCH 12/18] resolve mypy issue with reused variables --- haystack/preview/components/readers/extractive.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index 968bae8c80..d41355e66e 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -199,17 +199,17 @@ def _postprocess( start_candidates = start_candidates.cpu() end_candidates = end_candidates.cpu() - start_candidates = [ + start_candidates_char_indices = [ [encoding.token_to_chars(start)[0] for start in candidates] for candidates, encoding in zip(start_candidates, encodings) ] - end_candidates = [ + end_candidates_char_indices = [ [encoding.token_to_chars(end)[1] for end in candidates] for candidates, encoding in zip(end_candidates, encodings) ] probabilities = candidates.values.cpu() - return start_candidates, end_candidates, probabilities + return start_candidates_char_indices, end_candidates_char_indices, probabilities def _nest_answers( self, From 515a3155bcb01876d6fddf5c15381b84d2c48d4a Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 24 Oct 2023 10:29:28 +0200 Subject: [PATCH 13/18] install openai-whisper without dependencies --- .github/workflows/e2e_preview.yml | 4 +++- .github/workflows/linting_preview.yml | 7 +++++-- .github/workflows/tests.yml | 8 ++++++-- .github/workflows/tests_preview.yml | 16 ++++++++++++---- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/.github/workflows/e2e_preview.yml b/.github/workflows/e2e_preview.yml index bf017a0a60..905acc7e90 100644 --- a/.github/workflows/e2e_preview.yml +++ b/.github/workflows/e2e_preview.yml @@ -36,7 +36,9 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: | + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper - name: Run tests run: pytest e2e/preview diff --git a/.github/workflows/linting_preview.yml b/.github/workflows/linting_preview.yml index 858b377f8b..c291788f6d 100644 --- a/.github/workflows/linting_preview.yml +++ b/.github/workflows/linting_preview.yml @@ -38,7 +38,9 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: | + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper - name: Mypy if: steps.files.outputs.any_changed == 'true' @@ -72,7 +74,8 @@ jobs: - name: Install Haystack run: | - pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper pip install ./haystack-linter - name: Pylint diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2b003cd2ff..d4e50eed2d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -202,7 +202,9 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[preview,dev] langdetect transformers[torch,sentencepiece]==4.34.1 sentence-transformers>=2.2.0 pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: | + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper - name: Run run: pytest --cov-report xml:coverage.xml --cov="haystack" -m "unit" test/preview @@ -946,7 +948,9 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: | + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper - name: Run tests run: | diff --git a/.github/workflows/tests_preview.yml b/.github/workflows/tests_preview.yml index eee42ab4f0..b8feed0b4e 100644 --- a/.github/workflows/tests_preview.yml +++ b/.github/workflows/tests_preview.yml @@ -116,7 +116,9 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: | + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper - name: Run run: pytest -m "unit" test/preview @@ -175,7 +177,9 @@ jobs: sudo apt install ffmpeg # for local Whisper tests - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: | + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper - name: Run run: pytest --maxfail=5 -m "integration" test/preview @@ -230,7 +234,9 @@ jobs: colima start - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: | + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper - name: Run Tika run: docker run -d -p 9998:9998 apache/tika:2.9.0.0 @@ -282,7 +288,9 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' + run: | + pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2' + pip install --no-deps llvmlite numba 'openai-whisper>=20230918' # prevent outdated version of tiktoken pinned by openai-whisper - name: Run run: pytest --maxfail=5 -m "integration" test/preview -k 'not tika' From 55619fe51e20a32a4f3cbeaf6751d5b5d859fecf Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 24 Oct 2023 17:47:29 +0200 Subject: [PATCH 14/18] remove audio extra, update whisper install instructions --- haystack/nodes/audio/whisper_transcriber.py | 2 ++ haystack/preview/components/audio/whisper_local.py | 3 ++- pyproject.toml | 4 ---- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/haystack/nodes/audio/whisper_transcriber.py b/haystack/nodes/audio/whisper_transcriber.py index 1e95669cd3..8475d986a1 100644 --- a/haystack/nodes/audio/whisper_transcriber.py +++ b/haystack/nodes/audio/whisper_transcriber.py @@ -28,6 +28,8 @@ class WhisperTranscriber(BaseComponent): To use Whisper locally, install it following the instructions on the Whisper [GitHub repo](https://github.com/openai/whisper) and omit the `api_key` parameter. + You can work around a dependency conflict caused by openai-whisper pinning an older tiktoken version than required + by Haystack if you install via `pip install --no-deps numba llvmlite 'openai-whisper>=20230818'`. To use the API implementation, provide an api_key. You can get one by signing up for an [OpenAI account](https://beta.openai.com/). diff --git a/haystack/preview/components/audio/whisper_local.py b/haystack/preview/components/audio/whisper_local.py index e2f8526277..27a03c9bd0 100644 --- a/haystack/preview/components/audio/whisper_local.py +++ b/haystack/preview/components/audio/whisper_local.py @@ -6,7 +6,8 @@ from haystack.preview import component, Document, default_to_dict, ComponentError from haystack.preview.lazy_imports import LazyImport -with LazyImport("Run 'pip install openai-whisper'") as whisper_import: +with LazyImport("Run 'pip install transformers[torch]==4.34.1' if you don't have torch installed yet and then 'pip " + "install --no-deps numba llvmlite 'openai-whisper>=20230818'' to install whisper") as whisper_import: import torch import whisper diff --git a/pyproject.toml b/pyproject.toml index 7da2ab6436..6ab456c790 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,6 @@ dependencies = [ "networkx", # graphs library "quantulum3", # quantities extraction from text "posthog", # telemetry - # audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader "tenacity", # retry decorator "sseclient-py", # server side events for OpenAI streaming "more_itertools", # utilities @@ -152,9 +151,6 @@ docstores = [ docstores-gpu = [ "farm-haystack[elasticsearch,faiss-gpu,weaviate,pinecone,opensearch]", ] -audio = [ - "openai-whisper" -] aws = [ "boto3", # Costraint botocore to avoid taking to much time to resolve the dependency tree. From fb1f66232f08aa1ddb67224c870a2f4ab12d5542 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 24 Oct 2023 17:50:47 +0200 Subject: [PATCH 15/18] remove audio extra, update whisper install instructions --- haystack/preview/components/audio/whisper_local.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/haystack/preview/components/audio/whisper_local.py b/haystack/preview/components/audio/whisper_local.py index 27a03c9bd0..39ca8f7f10 100644 --- a/haystack/preview/components/audio/whisper_local.py +++ b/haystack/preview/components/audio/whisper_local.py @@ -6,8 +6,8 @@ from haystack.preview import component, Document, default_to_dict, ComponentError from haystack.preview.lazy_imports import LazyImport -with LazyImport("Run 'pip install transformers[torch]==4.34.1' if you don't have torch installed yet and then 'pip " - "install --no-deps numba llvmlite 'openai-whisper>=20230818'' to install whisper") as whisper_import: +with LazyImport("Run 'pip install transformers[torch]==4.34.1' to install torch and " + "'pip install --no-deps numba llvmlite 'openai-whisper>=20230818'' to install whisper.") as whisper_import: import torch import whisper From 97f83528f0865caf251a0c7375f0350cfd63b986 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 24 Oct 2023 17:55:02 +0200 Subject: [PATCH 16/18] keep audio extra but add version --- haystack/nodes/audio/whisper_transcriber.py | 2 +- haystack/preview/components/audio/whisper_local.py | 6 ++++-- pyproject.toml | 3 +++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/haystack/nodes/audio/whisper_transcriber.py b/haystack/nodes/audio/whisper_transcriber.py index 8475d986a1..d43d3f3e22 100644 --- a/haystack/nodes/audio/whisper_transcriber.py +++ b/haystack/nodes/audio/whisper_transcriber.py @@ -29,7 +29,7 @@ class WhisperTranscriber(BaseComponent): To use Whisper locally, install it following the instructions on the Whisper [GitHub repo](https://github.com/openai/whisper) and omit the `api_key` parameter. You can work around a dependency conflict caused by openai-whisper pinning an older tiktoken version than required - by Haystack if you install via `pip install --no-deps numba llvmlite 'openai-whisper>=20230818'`. + by Haystack if you install via `pip install --no-deps numba llvmlite 'openai-whisper>=20230918'`. To use the API implementation, provide an api_key. You can get one by signing up for an [OpenAI account](https://beta.openai.com/). diff --git a/haystack/preview/components/audio/whisper_local.py b/haystack/preview/components/audio/whisper_local.py index 39ca8f7f10..7b4106cb08 100644 --- a/haystack/preview/components/audio/whisper_local.py +++ b/haystack/preview/components/audio/whisper_local.py @@ -6,8 +6,10 @@ from haystack.preview import component, Document, default_to_dict, ComponentError from haystack.preview.lazy_imports import LazyImport -with LazyImport("Run 'pip install transformers[torch]==4.34.1' to install torch and " - "'pip install --no-deps numba llvmlite 'openai-whisper>=20230818'' to install whisper.") as whisper_import: +with LazyImport( + "Run 'pip install transformers[torch]==4.34.1' to install torch and " + "'pip install --no-deps numba llvmlite 'openai-whisper>=20230918'' to install whisper." +) as whisper_import: import torch import whisper diff --git a/pyproject.toml b/pyproject.toml index 6ab456c790..d3d5ffea64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,6 +151,9 @@ docstores = [ docstores-gpu = [ "farm-haystack[elasticsearch,faiss-gpu,weaviate,pinecone,opensearch]", ] +audio = [ + "openai-whisper>=20230918" +] aws = [ "boto3", # Costraint botocore to avoid taking to much time to resolve the dependency tree. From 328417898cb6a3904cd127e028c65fe4b93894d8 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 24 Oct 2023 18:07:14 +0200 Subject: [PATCH 17/18] keep audio extra with no constraints --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d3d5ffea64..f4d815cef7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,7 +152,7 @@ docstores-gpu = [ "farm-haystack[elasticsearch,faiss-gpu,weaviate,pinecone,opensearch]", ] audio = [ - "openai-whisper>=20230918" + "openai-whisper" ] aws = [ "boto3", From a15137a27dbb7311e76cd74f9562f9ec50c97302 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 24 Oct 2023 18:32:56 +0200 Subject: [PATCH 18/18] remove audio extra --- .github/workflows/linting.yml | 5 ++++- pyproject.toml | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 012535bd7c..bd8c782db9 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -39,7 +39,9 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install ".[all,dev]" + run: | + pip install ".[all,dev]" + pip install --no-deps llvmlite numba "openai-whisper>=20230918" - name: Mypy if: steps.files.outputs.any_changed == 'true' @@ -74,6 +76,7 @@ jobs: - name: Install Haystack run: | pip install ".[all,dev]" + pip install --no-deps llvmlite numba "openai-whisper>=20230918" pip install ./haystack-linter - name: Pylint diff --git a/pyproject.toml b/pyproject.toml index f4d815cef7..6ab456c790 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,9 +151,6 @@ docstores = [ docstores-gpu = [ "farm-haystack[elasticsearch,faiss-gpu,weaviate,pinecone,opensearch]", ] -audio = [ - "openai-whisper" -] aws = [ "boto3", # Costraint botocore to avoid taking to much time to resolve the dependency tree.