Merge branch 'main' of github.com:ahmetoner/whisper-asr-webservice

DennisTheD · Oct 8, 2023 · ae8b2f1 · ae8b2f1
2 parents a0aadf4 + 50aefcf
commit ae8b2f1
Show file tree

Hide file tree

Showing 26 changed files with 778 additions and 474 deletions.
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -0,0 +1,26 @@
+name: CI
+on:
+  push:
+    branches:
+      - main
+      - docs
+permissions:
+  contents: write
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    if: github.event.repository.fork == false
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.x
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+      - uses: actions/cache@v3
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+      - run: pip install mkdocs-material pymdown-extensions
+      - run: mkdocs gh-deploy --force
diff --git a/.gitignore b/.gitignore
@@ -39,4 +39,6 @@ MANIFEST.in
 pip-wheel-metadata
 /poetry.toml
 
-poetry/core/*
+poetry/core/*
+
+public
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,138 @@
+Changelog
+=========
+
+Unreleased
+----------
+
+### Changed
+
+- Changed misspelling in "Word level timestamps"
+
+[1.2.0] (2023-10-01)
+--------------------
+
+### Changed
+
+- Upgraded
+    - [openai/whisper](https://github.com/openai/whisper) to [v20230918](https://github.com/openai/whisper/releases/tag/v20230918)
+    - [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper) to [v0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0)
+
+### Updated
+
+- Updated model conversion method (for Faster Whisper) to use Hugging Face downloader
+- Updated default model paths to `~/.cache/whisper` or `/root/.cache/whisper`.
+    - For customization, modify the `ASR_MODEL_PATH` environment variable.
+    - Ensure Docker volume is set for the corresponding directory to use caching.
+      ```bash
+      docker run -d -p 9000:9000 -e ASR_MODEL_PATH=/data/whisper -v $PWD/yourlocaldir:/data/whisper onerahmet/openai-whisper-asr-webservice:latest
+      ```
+- Removed the `triton` dependency from `poetry.lock` to ensure the stability of the pipeline for `ARM-based` Docker images
+
+[1.1.1] (2023-05-29)
+--------------------
+
+### Changed
+
+- 94 gpus that don't support float16 in #103
+- Update compute type in #108
+- Add word level functionality for Faster Whisper in #109
+
+[1.1.0] (2023-04-17)
+--------------------
+
+### Changed
+
+- Docs in #72
+- Fix language code typo in #77
+- Adds support for FasterWhisper in #81
+- Add an optional param to skip the encoding step in #82
+- Faster whisper in #92
+
+[1.0.6] (2023-02-05)
+--------------------
+
+### Changed
+
+- Update README.md in #58
+- 68 update the versions in #69
+- Fix gunicorn run command and remove deprecated poetry run script in #70
+- Move torch installation method into the pyproject.toml file in #71
+- Add prompt to ASR in #66
+
+[1.0.5] (2022-12-08)
+--------------------
+
+### Changed
+
+- 43 make swagger doc not depend on internet connection in #52
+- Add new large model v2 in #53
+
+[1.0.4] (2022-11-28)
+--------------------
+
+### Changed
+
+- 43 make swagger doc not depend on internet connection in #51
+- Anally retentively fixed markdown linting warnings in README. Sorry. in #48
+- Explicit macOS readme with explanation for no-GPU [closes #44] in #47
+
+[1.0.3-beta] (2022-11-17)
+-------------------------
+
+### Changed
+
+- Combine transcribe endpoints in #36
+- Add multi worker support with gunicorn in #37
+- Add multi platform (amd & arm) support in #39
+- Upgrade Cuda version to 11.7 in #40
+- Lock to the latest whisper version (eff383) in #41
+
+[1.0.2-beta] (2022-10-04)
+-------------------------
+
+### Changed
+
+- add mutex lock to the model in #19
+- Subtitles in #21
+- Add gpu support and create Docker image for cuda with GitHub flow in #22
+
+[1.0.1-beta] (2022-09-27)
+-------------------------
+
+### Changed
+
+- Init GitHub runners in #10
+- Lock Whisper dependency with b4308... revision number to prevent build crashes in #15
+
+[1.0.0-beta] (2022-09-25)
+-------------------------
+
+### Changed
+
+- Docker init in #1
+- Create LICENCE in #2
+- Fastapi init in #3
+- Avoid temp file in #4
+- Translate init in #5
+- mp3 support by using ffmpeg instead of librosa in #8
+- add language detection endpoint in #9
+
+[1.2.0]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.2.0
+
+[1.1.1]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.1.1
+
+[1.1.0]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.1.0
+
+[1.0.6]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.0.6
+
+[1.0.5]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.0.5
+
+[1.0.4]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.0.4
+
+[1.0.3-beta]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.0.3-beta
+
+[1.0.2-beta]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.0.2-beta
+
+[1.0.1-beta]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.0.1-beta
+
+[1.0.0-beta]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/1.0.0-beta
diff --git a/Dockerfile b/Dockerfile
@@ -12,7 +12,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \
 
 RUN python3 -m venv $POETRY_VENV \
     && $POETRY_VENV/bin/pip install -U pip setuptools \
-    && $POETRY_VENV/bin/pip install poetry==1.4.0
+    && $POETRY_VENV/bin/pip install poetry==1.6.1
 
 ENV PATH="${PATH}:${POETRY_VENV}/bin"
 
@@ -25,7 +25,8 @@ COPY --from=swagger-ui /usr/share/nginx/html/swagger-ui-bundle.js swagger-ui-ass
 RUN poetry config virtualenvs.in-project true
 RUN poetry install
 
-RUN git clone https://github.com/m-bain/whisperX.git \
+RUN $POETRY_VENV/bin/pip install pandas transformers nltk pyannote.audio
+RUN git clone --depth 1 https://github.com/m-bain/whisperX.git \
     && cd whisperX \
     && $POETRY_VENV/bin/pip install -e .
 

diff --git a/Dockerfile.gpu b/Dockerfile.gpu
@@ -1,5 +1,5 @@
 FROM swaggerapi/swagger-ui:v4.18.2 AS swagger-ui
-FROM nvidia/cuda:11.7.1-base-ubuntu22.04
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
 
 ENV PYTHON_VERSION=3.10
 ENV POETRY_VENV=/app/.venv
@@ -20,7 +20,7 @@ RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 && \
 
 RUN python3 -m venv $POETRY_VENV \
     && $POETRY_VENV/bin/pip install -U pip setuptools \
-    && $POETRY_VENV/bin/pip install poetry==1.4.0
+    && $POETRY_VENV/bin/pip install poetry==1.6.1
 
 ENV PATH="${PATH}:${POETRY_VENV}/bin"
 
@@ -36,13 +36,13 @@ COPY --from=swagger-ui /usr/share/nginx/html/swagger-ui.css swagger-ui-assets/sw
 COPY --from=swagger-ui /usr/share/nginx/html/swagger-ui-bundle.js swagger-ui-assets/swagger-ui-bundle.js
 
 RUN poetry install
-RUN $POETRY_VENV/bin/pip install torch torchaudio \
-    --index-url https://download.pytorch.org/whl/cu117 \
+RUN $POETRY_VENV/bin/pip install torch torchaudio pandas transformers nltk pyannote.audio \
+    --index-url https://download.pytorch.org/whl/cu118 \
     --index-url https://pypi.org/simple/
 
-RUN git clone https://github.com/m-bain/whisperX.git \
+RUN git clone --depth 1 https://github.com/m-bain/whisperX.git \
     && cd whisperX \
-    && $POETRY_VENV/bin/pip install -e .
+    && $POETRY_VENV/bin/pip install --no-dependencies -e .
 
 EXPOSE 9000
 CMD gunicorn --bind 0.0.0.0:9000 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker