diff --git a/.github/actions/icefall/Dockerfile b/.github/actions/icefall/Dockerfile new file mode 100644 index 0000000000..23366163a4 --- /dev/null +++ b/.github/actions/icefall/Dockerfile @@ -0,0 +1,48 @@ +# Get base from a pytorch image +FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-runtime + +# Set to install things in non-interactive mode +ENV DEBIAN_FRONTEND noninteractive + +# Install system wide softwares +RUN apt-get update \ + && apt-get install -y \ + libgl1-mesa-glx \ + libx11-xcb1 \ + git \ + gcc \ + mono-mcs \ + libavcodec-extra \ + ffmpeg \ + curl \ + libsndfile-dev \ + libsndfile1 \ + && apt-get clean all \ + && rm -r /var/lib/apt/lists/* + +RUN /opt/conda/bin/conda install --yes \ + astropy \ + matplotlib \ + pandas \ + scikit-learn \ + scikit-image + +# Install necessary libraries for Icefall +# Install k2 +RUN pip install torch==2.0.1+cpu -f https://download.pytorch.org/whl/torch_stable.html +RUN pip install torchaudio +RUN pip install k2==1.24.3.dev20230726+cpu.torch2.0.1 -f https://k2-fsa.github.io/k2/cpu.html + +# Install lhotse +RUN pip install lhotse + +# Install Icefall +RUN git clone https://github.com/k2-fsa/icefall +RUN cd icefall +RUN pip install -r requirements.txt +RUN export PYTHONPATH=$PYTHONPATH:. + +RUN pip install numba==0.50.0 +RUN pip install pytest-cov + +RUN pip install kaldiio \ No newline at end of file diff --git a/.github/actions/icefall/action.yml b/.github/actions/icefall/action.yml new file mode 100644 index 0000000000..123e26d828 --- /dev/null +++ b/.github/actions/icefall/action.yml @@ -0,0 +1,7 @@ +name: 'Test Icefall' +description: 'Run tests for Icefall' +runs: + using: 'composite' + steps: + - run: $GITHUB_ACTION_PATH/run.sh + shell: bash diff --git a/.github/actions/icefall/run.sh b/.github/actions/icefall/run.sh new file mode 100755 index 0000000000..25c5e66a53 --- /dev/null +++ b/.github/actions/icefall/run.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +exit_code=0 + +pytest --cov-report=xml --cov=art --cov-append -q -vv tests/estimators/speech_recognition/test_pytorch_icefall.py --framework=pytorch --durations=0 +if [[ $? -ne 0 ]]; then exit_code=1; echo "Failed estimators/speech_recognition/test_pytorch_icefall tests"; fi + +exit ${exit_code} diff --git a/.github/workflows/ci-icefall.yml b/.github/workflows/ci-icefall.yml new file mode 100644 index 0000000000..51f7d648de --- /dev/null +++ b/.github/workflows/ci-icefall.yml @@ -0,0 +1,37 @@ +name: CI PyTorchIcefall +on: + # Run on manual trigger + workflow_dispatch: + + # Run on pull requests + pull_request: + paths-ignore: + - '*.md' + + # Run on merge queue + merge_group: + + # Run when pushing to main or dev branches + push: + branches: + - main + - dev* + + # Run scheduled CI flow daily + schedule: + - cron: '0 8 * * 0' + +jobs: + test_icefall: + name: PyTorchIcefall + runs-on: ubuntu-latest + container: adversarialrobustnesstoolbox/art_testing_envs:icefall + steps: + - name: Checkout Repo + uses: actions/checkout@v3 + - name: Run Test Action + uses: ./.github/actions/icefall + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + fail_ci_if_error: true diff --git a/tests/estimators/speech_recognition/test_pytorch_icefall.py b/tests/estimators/speech_recognition/test_pytorch_icefall.py new file mode 100644 index 0000000000..e2d08a82bb --- /dev/null +++ b/tests/estimators/speech_recognition/test_pytorch_icefall.py @@ -0,0 +1,124 @@ +# MIT License +# +# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2021 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import logging + +import numpy as np +import pytest + +from art.config import ART_NUMPY_DTYPE +from tests.utils import ARTTestException + +logger = logging.getLogger(__name__) + + +@pytest.mark.skip_module("icefall") +@pytest.mark.skip_framework("tensorflow", "tensorflow2v1", "keras", "kerastf", "mxnet", "non_dl_frameworks") +@pytest.mark.parametrize("device_type", ["cpu"]) +def test_pytorch_icefall(art_warning, expected_values, device_type): + import torch + + from art.estimators.speech_recognition.pytorch_icefall import PyTorchIcefall + + try: + # Initialize a speech recognizer + speech_recognizer = PyTorchIcefall() + + # Load data for testing + expected_data = expected_values() + + x1 = expected_data["x1"] + x2 = expected_data["x2"] + x3 = expected_data["x3"] + # expected_sizes = expected_data["expected_sizes"] + expected_transcriptions1 = expected_data["expected_transcriptions1"] + expected_transcriptions2 = expected_data["expected_transcriptions2"] + # expected_probs = expected_data["expected_probs"] + expected_gradients1 = expected_data["expected_gradients1"] + expected_gradients2 = expected_data["expected_gradients2"] + expected_gradients3 = expected_data["expected_gradients3"] + + # Create signal data + x = np.array( + [ + np.array(x1 * 100, dtype=ART_NUMPY_DTYPE), + np.array(x2 * 100, dtype=ART_NUMPY_DTYPE), + np.array(x3 * 100, dtype=ART_NUMPY_DTYPE), + ] + ) + + # Create labels + y = np.array(["SIX", "HI", "GOOD"]) + + # Test probability outputs + # probs, sizes = speech_recognizer.predict(x, batch_size=2,) + # + # np.testing.assert_array_almost_equal(probs[1][1], expected_probs, decimal=3) + # np.testing.assert_array_almost_equal(sizes, expected_sizes) + + # Test transcription outputs + _ = speech_recognizer.predict(x[[0]], batch_size=2) + + # Test transcription outputs + transcriptions = speech_recognizer.predict(x, batch_size=2) + + assert (expected_transcriptions1 == transcriptions).all() + + # Test transcription outputs, corner case + transcriptions = speech_recognizer.predict(np.array([x[0]]), batch_size=2) + + assert (expected_transcriptions2 == transcriptions).all() + + # Now test loss gradients + # Compute gradients + grads = speech_recognizer.loss_gradient(x, y) + + assert grads[0].shape == (1300,) + assert grads[1].shape == (1500,) + assert grads[2].shape == (1400,) + + np.testing.assert_array_almost_equal(grads[0][:20], expected_gradients1, decimal=-2) + np.testing.assert_array_almost_equal(grads[1][:20], expected_gradients2, decimal=-2) + np.testing.assert_array_almost_equal(grads[2][:20], expected_gradients3, decimal=-2) + + # Train the estimator + with pytest.raises(NotImplementedError): + speech_recognizer.fit(x=x, y=y, batch_size=2, nb_epochs=5) + + # Compute local shape + local_batch_size = len(x) + real_lengths = np.array([x_.shape[0] for x_ in x]) + local_max_length = np.max(real_lengths) + + # Reformat input + input_mask = np.zeros([local_batch_size, local_max_length], dtype=np.float64) + original_input = np.zeros([local_batch_size, local_max_length], dtype=np.float64) + + for local_batch_size_idx in range(local_batch_size): + input_mask[local_batch_size_idx, : len(x[local_batch_size_idx])] = 1 + original_input[local_batch_size_idx, : len(x[local_batch_size_idx])] = x[local_batch_size_idx] + + # compute_loss_and_decoded_output + loss, decoded_output = speech_recognizer.compute_loss_and_decoded_output( + masked_adv_input=torch.tensor(original_input), original_output=y + ) + + assert loss.detach().numpy() == pytest.approx(46.3156, abs=20.0) + assert all(decoded_output == ["EH", "EH", "EH"]) + + except ARTTestException as e: + art_warning(e)