Implement testing workflow for Icefall ASR

HSTEHSTEHSTE · Dec 12, 2023 · 973c5b0 · 973c5b0
1 parent 919a6df
commit 973c5b0
Show file tree

Hide file tree

Showing 5 changed files with 224 additions and 0 deletions.
diff --git a/.github/actions/icefall/Dockerfile b/.github/actions/icefall/Dockerfile
@@ -0,0 +1,48 @@
+# Get base from a pytorch image
+FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-runtime
+
+# Set to install things in non-interactive mode
+ENV DEBIAN_FRONTEND noninteractive
+
+# Install system wide softwares
+RUN apt-get update \
+     && apt-get install -y \
+        libgl1-mesa-glx \
+        libx11-xcb1 \
+        git \
+        gcc \
+        mono-mcs \
+        libavcodec-extra \
+        ffmpeg \
+        curl \
+        libsndfile-dev \
+        libsndfile1 \
+     && apt-get clean all \
+     && rm -r /var/lib/apt/lists/*
+
+RUN /opt/conda/bin/conda install --yes \
+    astropy \
+    matplotlib \
+    pandas \
+    scikit-learn \
+    scikit-image
+
+# Install necessary libraries for Icefall
+# Install k2
+RUN pip install torch==2.0.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
+RUN pip install torchaudio
+RUN pip install k2==1.24.3.dev20230726+cpu.torch2.0.1 -f https://k2-fsa.github.io/k2/cpu.html
+
+# Install lhotse
+RUN pip install lhotse
+
+# Install Icefall
+RUN git clone https://github.com/k2-fsa/icefall
+RUN cd icefall
+RUN pip install -r requirements.txt
+RUN export PYTHONPATH=$PYTHONPATH:.
+
+RUN pip install numba==0.50.0
+RUN pip install pytest-cov
+
+RUN pip install kaldiio
diff --git a/.github/actions/icefall/action.yml b/.github/actions/icefall/action.yml
@@ -0,0 +1,7 @@
+name: 'Test Icefall'
+description: 'Run tests for Icefall'
+runs:
+  using: 'composite'
+  steps:
+    - run: $GITHUB_ACTION_PATH/run.sh
+      shell: bash
diff --git a/.github/actions/icefall/run.sh b/.github/actions/icefall/run.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+exit_code=0
+
+pytest --cov-report=xml --cov=art --cov-append -q -vv tests/estimators/speech_recognition/test_pytorch_icefall.py --framework=pytorch --durations=0
+if [[ $? -ne 0 ]]; then exit_code=1; echo "Failed estimators/speech_recognition/test_pytorch_icefall tests"; fi
+
+exit ${exit_code}
diff --git a/.github/workflows/ci-icefall.yml b/.github/workflows/ci-icefall.yml
@@ -0,0 +1,37 @@
+name: CI PyTorchIcefall
+on:
+  # Run on manual trigger
+  workflow_dispatch:
+
+  # Run on pull requests
+  pull_request:
+    paths-ignore:
+      - '*.md'
+
+  # Run on merge queue
+  merge_group:
+
+  # Run when pushing to main or dev branches
+  push:
+    branches:
+      - main
+      - dev*
+
+  # Run scheduled CI flow daily
+  schedule:
+    - cron: '0 8 * * 0'
+
+jobs:
+  test_icefall:
+    name: PyTorchIcefall
+    runs-on: ubuntu-latest
+    container: adversarialrobustnesstoolbox/art_testing_envs:icefall
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v3
+      - name: Run Test Action
+        uses: ./.github/actions/icefall
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          fail_ci_if_error: true
diff --git a/tests/estimators/speech_recognition/test_pytorch_icefall.py b/tests/estimators/speech_recognition/test_pytorch_icefall.py
@@ -0,0 +1,124 @@
+# MIT License
+#
+# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2021
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import logging
+
+import numpy as np
+import pytest
+
+from art.config import ART_NUMPY_DTYPE
+from tests.utils import ARTTestException
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.mark.skip_module("icefall")
+@pytest.mark.skip_framework("tensorflow", "tensorflow2v1", "keras", "kerastf", "mxnet", "non_dl_frameworks")
+@pytest.mark.parametrize("device_type", ["cpu"])
+def test_pytorch_icefall(art_warning, expected_values, device_type):
+    import torch
+
+    from art.estimators.speech_recognition.pytorch_icefall import PyTorchIcefall
+
+    try:
+        # Initialize a speech recognizer
+        speech_recognizer = PyTorchIcefall()
+
+        # Load data for testing
+        expected_data = expected_values()
+
+        x1 = expected_data["x1"]
+        x2 = expected_data["x2"]
+        x3 = expected_data["x3"]
+        # expected_sizes = expected_data["expected_sizes"]
+        expected_transcriptions1 = expected_data["expected_transcriptions1"]
+        expected_transcriptions2 = expected_data["expected_transcriptions2"]
+        # expected_probs = expected_data["expected_probs"]
+        expected_gradients1 = expected_data["expected_gradients1"]
+        expected_gradients2 = expected_data["expected_gradients2"]
+        expected_gradients3 = expected_data["expected_gradients3"]
+
+        # Create signal data
+        x = np.array(
+            [
+                np.array(x1 * 100, dtype=ART_NUMPY_DTYPE),
+                np.array(x2 * 100, dtype=ART_NUMPY_DTYPE),
+                np.array(x3 * 100, dtype=ART_NUMPY_DTYPE),
+            ]
+        )
+
+        # Create labels
+        y = np.array(["SIX", "HI", "GOOD"])
+
+        # Test probability outputs
+        # probs, sizes = speech_recognizer.predict(x, batch_size=2,)
+        #
+        # np.testing.assert_array_almost_equal(probs[1][1], expected_probs, decimal=3)
+        # np.testing.assert_array_almost_equal(sizes, expected_sizes)
+
+        # Test transcription outputs
+        _ = speech_recognizer.predict(x[[0]], batch_size=2)
+
+        # Test transcription outputs
+        transcriptions = speech_recognizer.predict(x, batch_size=2)
+
+        assert (expected_transcriptions1 == transcriptions).all()
+
+        # Test transcription outputs, corner case
+        transcriptions = speech_recognizer.predict(np.array([x[0]]), batch_size=2)
+
+        assert (expected_transcriptions2 == transcriptions).all()
+
+        # Now test loss gradients
+        # Compute gradients
+        grads = speech_recognizer.loss_gradient(x, y)
+
+        assert grads[0].shape == (1300,)
+        assert grads[1].shape == (1500,)
+        assert grads[2].shape == (1400,)
+
+        np.testing.assert_array_almost_equal(grads[0][:20], expected_gradients1, decimal=-2)
+        np.testing.assert_array_almost_equal(grads[1][:20], expected_gradients2, decimal=-2)
+        np.testing.assert_array_almost_equal(grads[2][:20], expected_gradients3, decimal=-2)
+
+        # Train the estimator
+        with pytest.raises(NotImplementedError):
+            speech_recognizer.fit(x=x, y=y, batch_size=2, nb_epochs=5)
+
+        # Compute local shape
+        local_batch_size = len(x)
+        real_lengths = np.array([x_.shape[0] for x_ in x])
+        local_max_length = np.max(real_lengths)
+
+        # Reformat input
+        input_mask = np.zeros([local_batch_size, local_max_length], dtype=np.float64)
+        original_input = np.zeros([local_batch_size, local_max_length], dtype=np.float64)
+
+        for local_batch_size_idx in range(local_batch_size):
+            input_mask[local_batch_size_idx, : len(x[local_batch_size_idx])] = 1
+            original_input[local_batch_size_idx, : len(x[local_batch_size_idx])] = x[local_batch_size_idx]
+
+        # compute_loss_and_decoded_output
+        loss, decoded_output = speech_recognizer.compute_loss_and_decoded_output(
+            masked_adv_input=torch.tensor(original_input), original_output=y
+        )
+
+        assert loss.detach().numpy() == pytest.approx(46.3156, abs=20.0)
+        assert all(decoded_output == ["EH", "EH", "EH"])
+
+    except ARTTestException as e:
+        art_warning(e)