From 3538c25cc5d660648c6e50d5873289a2e73c450f Mon Sep 17 00:00:00 2001 From: Jordi Mas Date: Tue, 7 May 2024 13:28:43 +0200 Subject: [PATCH] Update to whisper-ctranslate2 0.4.3 and the depending stack. Improvements in WER --- benchmark/results.json | 46 +++++++++++++++--------------- transcribe-batch/docker/Dockerfile | 2 +- transcribe-batch/requirements.txt | 6 ++-- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/benchmark/results.json b/benchmark/results.json index 9b9f547..a0695a5 100644 --- a/benchmark/results.json +++ b/benchmark/results.json @@ -4,30 +4,30 @@ { "audio": "samples/15GdH9-curt.mp3", "wer": "72.57", - "time": 86 + "time": 83 }, { "audio": "samples/EloiBadiaCat.mp3", - "wer": "22.26", - "time": 69 + "wer": "22.57", + "time": 63 }, { "audio": "samples/Son_Goku_catalan_valencian_voice.ogg", - "wer": "51.90", - "time": 32 + "wer": "40.51", + "time": 27 }, { "audio": "samples/Universal_Declaration_of_Human_Rights_-_cat_-_nv.ogg", - "wer": "39.51", - "time": 202 + "wer": "31.38", + "time": 200 }, { "audio": "samples/Ona_catalan-balear.mp3", - "wer": "46.81", - "time": 37 + "wer": "46.01", + "time": 38 }, { - "avg_wer": "46.61" + "avg_wer": "42.61" } ] }, @@ -35,38 +35,38 @@ "medium": [ { "audio": "samples/15GdH9-curt.mp3", - "wer": "35.43", - "time": 174 + "wer": "35.62", + "time": 181 }, { "audio": "samples/EloiBadiaCat.mp3", - "wer": "15.05", - "time": 167 + "wer": "14.73", + "time": 175 }, { "audio": "samples/Son_Goku_catalan_valencian_voice.ogg", - "wer": "25.32", - "time": 50 + "wer": "27.85", + "time": 52 }, { "audio": "samples/Universal_Declaration_of_Human_Rights_-_cat_-_nv.ogg", - "wer": "36.50", - "time": 513 + "wer": "19.12", + "time": 532 }, { "audio": "samples/Ona_catalan-balear.mp3", - "wer": "22.87", - "time": 88 + "wer": "21.54", + "time": 92 }, { - "avg_wer": "27.03" + "avg_wer": "23.77" } ] }, { "totals": { - "wer": "36.82", - "time": "1418" + "wer": "33.19", + "time": "1443" } } ] \ No newline at end of file diff --git a/transcribe-batch/docker/Dockerfile b/transcribe-batch/docker/Dockerfile index e4731e3..64f96df 100644 --- a/transcribe-batch/docker/Dockerfile +++ b/transcribe-batch/docker/Dockerfile @@ -13,7 +13,7 @@ RUN apt-get install python3-pip python3-dev -y --no-install-recommends # Solve errors: # - Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory # - Error: libcublasLt.so.11: cannot open shared object file: No such file or directory -RUN apt-get install libcudnn8=8.8.1.3-1+cuda11.8 && apt-get install libcublas-11-8 -y +RUN apt-get install libcudnn8=8.8.1.3-1+cuda11.8 && apt-get install libcublas-12-3 && apt-get install libcublas-11-8 -y WORKDIR /srv diff --git a/transcribe-batch/requirements.txt b/transcribe-batch/requirements.txt index f4f96a1..561f3a1 100644 --- a/transcribe-batch/requirements.txt +++ b/transcribe-batch/requirements.txt @@ -1,5 +1,5 @@ psutil -ctranslate2==3.22 -faster-whisper==0.10.1 -whisper-ctranslate2==0.3.7 +ctranslate2==4.2.1 +faster-whisper==1.0.2 +whisper-ctranslate2==0.4.3 langdetect==1.0.9