Skip to content

Add truncate and normalize parameters to TEI Embedders (#7460) #12467

Add truncate and normalize parameters to TEI Embedders (#7460)

Add truncate and normalize parameters to TEI Embedders (#7460) #12467

Workflow file for this run

# If you change this name also do it in tests_skipper.yml and ci_metrics.yml
name: Tests
on:
workflow_dispatch: # Activate this workflow manually
push:
branches:
- main
# release branches have the form v1.9.x
- "v[0-9].*[0-9].x"
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
paths:
# Keep the list in sync with the paths defined in the `tests_skipper.yml` workflow
- "haystack/**/*.py"
- "haystack/core/pipeline/predefined/*"
- "test/**/*.py"
- "pyproject.toml"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CORE_AZURE_CS_ENDPOINT: ${{ secrets.CORE_AZURE_CS_ENDPOINT }}
CORE_AZURE_CS_API_KEY: ${{ secrets.CORE_AZURE_CS_API_KEY }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PYTHON_VERSION: "3.8"
HATCH_VERSION: "1.9.3"
jobs:
black:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Install Hatch
run: pip install hatch==${{ env.HATCH_VERSION }}
- name: Check status
run: hatch run default:format-check
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
install-dependencies:
name: Install and cache ${{ matrix.os }} dependencies
needs: black
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Install Hatch
run: pip install hatch==${{ env.HATCH_VERSION }}
- name: Install dependencies
# To actually install and sync the dependencies
run: hatch run test:pip list
- uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }}
unit-tests:
name: Unit / ${{ matrix.os }}
needs: install-dependencies
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
- macos-latest
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Restore Python dependencies
uses: actions/cache/restore@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }}
- name: Run
run: hatch run test:unit
- name: Coveralls
# We upload only coverage for ubuntu as handling both os
# complicates the workflow too much for little to no gain
if: matrix.os == 'ubuntu-latest'
uses: coverallsapp/github-action@v2
with:
path-to-lcov: coverage.xml
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-linux:
name: Integration / ubuntu-latest
needs: unit-tests
runs-on: ubuntu-latest
services:
tika:
image: apache/tika:2.9.0.0
ports:
- 9998:9998
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Install dependencies
run: |
sudo apt update
sudo apt install ffmpeg # for local Whisper tests
- name: Restore Python dependencies
uses: actions/cache/restore@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }}
- name: Run
run: hatch run test:integration
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-macos:
name: Integration / macos-latest
needs: unit-tests
runs-on: macos-latest
env:
HAYSTACK_MPS_ENABLED: false
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Install dependencies
run: |
brew install ffmpeg # for local Whisper tests
- name: Restore Python dependencies
uses: actions/cache/restore@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }}
- name: Run
run: hatch run test:integration-mac
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-windows:
name: Integration / windows-latest
needs: unit-tests
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Restore Python dependencies
uses: actions/cache/restore@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }}
- name: Run
run: hatch run test:integration-windows
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
catch-all:
name: Catch-all check
runs-on: ubuntu-latest
# This job will be executed only after all the other tests
# are successful.
# This way we'll be able to mark only this test as required
# and skip it accordingly.
needs:
- integration-tests-linux
- integration-tests-macos
- integration-tests-windows
steps:
- name: Finisher
run: echo "Finish him!"