From 70297dd075b9765c4343cd62ea290132592ce1dc Mon Sep 17 00:00:00 2001 From: kadirnar Date: Thu, 23 Nov 2023 21:51:11 +0300 Subject: [PATCH 1/4] =?UTF-8?q?feat:=20=F0=9F=9A=80=20Add=20GitHub=20workf?= =?UTF-8?q?lows=20for=20package=20testing=20and=20CI,=20integrate=20logger?= =?UTF-8?q?=20support=20in=20whisper.py=20=F0=9F=A4=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/FUNDING.yml | 1 + .github/workflows/ci.yml | 85 +++++++++++++++++++++++++++ .github/workflows/package_testing.yml | 78 ++++++++++++++++++++++++ .github/workflows/publish_pypi.yml | 34 +++++++++++ whisperplus/__init__.py | 2 +- whisperplus/pipelines/whisper.py | 12 +++- 6 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 .github/FUNDING.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/package_testing.yml create mode 100644 .github/workflows/publish_pypi.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..a5822e0 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: kadirnar diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..337085f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,85 @@ +name: Python package CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + runs-on: ${{ matrix.operating-system }} + strategy: + matrix: + operating-system: [ubuntu-latest, windows-latest, macos-latest] + python-version: [3.8, 3.9, 3.11] + torch-version: [2.0.0, 2.0.1] + fail-fast: false + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Restore Ubuntu cache + uses: actions/cache@v2 + if: matrix.operating-system == 'ubuntu-latest' + with: + path: ~/.cache/pip + key: ${{ matrix.operating-system }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.operating-system }}-${{ matrix.python-version }}- + + - name: Restore MacOS cache + uses: actions/cache@v2 + if: matrix.operating-system == 'macos-latest' + with: + path: ~/Library/Caches/pip + key: ${{ matrix.operating-system }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.operating-system }}-${{ matrix.python-version }}- + + - name: Restore Windows cache + uses: actions/cache@v2 + if: matrix.operating-system == 'windows-latest' + with: + path: ~\AppData\Local\pip\Cache + key: ${{ matrix.operating-system }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.operating-system }}-${{ matrix.python-version }}- + + - name: Update pip + run: python -m pip install --upgrade pip + + - name: Lint with flake8, black and isort + run: | + pip install -e .[dev] + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + black . --check --config pyproject.toml + isort -c . + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=120 --statistics + + - name: Install Numpy + run: > + pip install numpy + + - name: Install PyTorch on Linux and Windows + if: > + matrix.operating-system == 'ubuntu-latest' || + matrix.operating-system == 'windows-latest' + run: > + pip install torch==${{ matrix.torch-version }}+cpu + -f https://download.pytorch.org/whl/torch_stable.html + + - name: Install PyTorch on MacOS + if: matrix.operating-system == 'macos-latest' + run: pip install torch==${{ matrix.torch-version }} + + - name: Install whisperplus package from local setup.py + run: > + pip install -e . + + - name: Unittest whisperplus + run: | + python -m unittest diff --git a/.github/workflows/package_testing.yml b/.github/workflows/package_testing.yml new file mode 100644 index 0000000..71fd004 --- /dev/null +++ b/.github/workflows/package_testing.yml @@ -0,0 +1,78 @@ +name: Package Testing + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + runs-on: ${{ matrix.operating-system }} + + strategy: + matrix: + operating-system: [ubuntu-latest, windows-latest, macos-latest] + python-version: [3.8, 3.9, 3.11] + torch-version: [2.0.0, 2.0.1] + fail-fast: false + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Restore Ubuntu cache + uses: actions/cache@v2 + if: matrix.operating-system == 'ubuntu-latest' + with: + path: ~/.cache/pip + key: ${{ matrix.operating-system }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.operating-system }}-${{ matrix.python-version }}- + + - name: Restore MacOS cache + uses: actions/cache@v2 + if: matrix.operating-system == 'macos-latest' + with: + path: ~/Library/Caches/pip + key: ${{ matrix.operating-system }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.operating-system }}-${{ matrix.python-version }}- + + - name: Restore Windows cache + uses: actions/cache@v2 + if: matrix.operating-system == 'windows-latest' + with: + path: ~\AppData\Local\pip\Cache + key: ${{ matrix.operating-system }}-${{ matrix.python-version }}-${{ hashFiles('**/setup.py')}} + restore-keys: ${{ matrix.operating-system }}-${{ matrix.python-version }}- + + - name: Update pip + run: python -m pip install --upgrade pip + + - name: Install Numpy + run: > + pip install numpy + + - name: Install PyTorch on Linux and Windows + if: > + matrix.operating-system == 'ubuntu-latest' || + matrix.operating-system == 'windows-latest' + run: > + pip install torch==${{ matrix.torch-version }}+cpu + -f https://download.pytorch.org/whl/torch_stable.html + + - name: Install PyTorch on MacOS + if: matrix.operating-system == 'macos-latest' + run: pip install torch==${{ matrix.torch-version }} + + - name: Install latest whisperplus package + run: > + pip install --upgrade --force-reinstall whisperplus + + - name: Unittest whisperplus + run: | + python -m unittest diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml new file mode 100644 index 0000000..fd5cc4c --- /dev/null +++ b/.github/workflows/publish_pypi.yml @@ -0,0 +1,34 @@ +name: Publish Python distributions to PyPI + +on: + push: + branches: [ master ] + +jobs: + build-n-publish: + name: Build and publish Python distributions to PyPI + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install pypa/build + run: >- + python -m + pip install + build + - name: Build a binary wheel and a source tarball + run: >- + python -m + build + --sdist + --wheel + --outdir dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.PYPI_WHISPERPLUS_API_KEY }} diff --git a/whisperplus/__init__.py b/whisperplus/__init__.py index 6aa7ae6..35da54f 100644 --- a/whisperplus/__init__.py +++ b/whisperplus/__init__.py @@ -2,7 +2,7 @@ from whisperplus.pipelines.whisper import SpeechToTextPipeline from whisperplus.utils.download_utils import download_and_convert_to_mp3 -__version__ = '0.0.4' +__version__ = '0.0.5' __author__ = 'kadirnar' __license__ = 'Apache License 2.0' __all__ = [''] diff --git a/whisperplus/pipelines/whisper.py b/whisperplus/pipelines/whisper.py index f9e7ce2..820f759 100644 --- a/whisperplus/pipelines/whisper.py +++ b/whisperplus/pipelines/whisper.py @@ -1,6 +1,10 @@ +import logging + import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + class SpeechToTextPipeline: """Class for converting audio to text using a pre-trained speech recognition model.""" @@ -11,6 +15,8 @@ def __init__(self, model_id: str = "openai/whisper-large-v3"): if self.model is None: self.load_model(model_id) + else: + logging.info("Model already loaded.") self.set_device() @@ -21,6 +27,8 @@ def set_device(self): else: self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + logging.info(f"Using device: {self.device}") + def load_model(self, model_id: str = "openai/whisper-large-v3"): """ Loads the pre-trained speech recognition model and moves it to the specified device. @@ -28,9 +36,11 @@ def load_model(self, model_id: str = "openai/whisper-large-v3"): Args: model_id (str): Identifier of the pre-trained model to be loaded. """ + logging.info("Loading model...") model = AutoModelForSpeechSeq2Seq.from_pretrained( model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True) model.to(self.device) + logging.info("Model loaded successfully.") self.model = model @@ -60,6 +70,6 @@ def __call__(self, audio_path: str, model_id: str = "openai/whisper-large-v3", l model_kwargs={"use_flash_attention_2": True}, generate_kwargs={"language": language}, ) - + logging.info("Transcribing audio...") result = pipe(audio_path)["text"] return result From a943c4386be85080e24c95cd83e68fa3c4938eac Mon Sep 17 00:00:00 2001 From: kadirnar Date: Thu, 23 Nov 2023 21:56:53 +0300 Subject: [PATCH 2/4] =?UTF-8?q?chore:=20=E2=AC=86=EF=B8=8F=20Update=20Pyth?= =?UTF-8?q?on=20and=20Torch=20versions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/package_testing.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package_testing.yml b/.github/workflows/package_testing.yml index 71fd004..f2bf491 100644 --- a/.github/workflows/package_testing.yml +++ b/.github/workflows/package_testing.yml @@ -13,8 +13,8 @@ jobs: strategy: matrix: operating-system: [ubuntu-latest, windows-latest, macos-latest] - python-version: [3.8, 3.9, 3.11] - torch-version: [2.0.0, 2.0.1] + python-version: [3.10,3.11,3.12] + torch-version: [2.1.0, 2.1.1] fail-fast: false steps: From 531eebe01d96334f5ebccb28b81d27f5f7253298 Mon Sep 17 00:00:00 2001 From: kadirnar Date: Thu, 23 Nov 2023 21:59:58 +0300 Subject: [PATCH 3/4] Update CI workflow to use pre-commit for styling checks --- .github/workflows/ci.yml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 337085f..4128867 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,17 +52,10 @@ jobs: - name: Update pip run: python -m pip install --upgrade pip - - name: Lint with flake8, black and isort + - name: Check styling with pre-commit run: | - pip install -e .[dev] - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - black . --check --config pyproject.toml - isort -c . - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=120 --statistics - - - name: Install Numpy - run: > - pip install numpy + pre-commit install + pre-commit run --all-files - name: Install PyTorch on Linux and Windows if: > @@ -75,6 +68,7 @@ jobs: - name: Install PyTorch on MacOS if: matrix.operating-system == 'macos-latest' run: pip install torch==${{ matrix.torch-version }} + - name: Install PyTorch on Linux and Windows - name: Install whisperplus package from local setup.py run: > From 898a54638323d5d3e340024465b983f5ecfd958c Mon Sep 17 00:00:00 2001 From: kadirnar Date: Thu, 23 Nov 2023 22:02:15 +0300 Subject: [PATCH 4/4] Update Python and Torch versions in CI and package testing workflows --- .github/workflows/ci.yml | 4 ++-- .github/workflows/package_testing.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4128867..5f1a60b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,8 +12,8 @@ jobs: strategy: matrix: operating-system: [ubuntu-latest, windows-latest, macos-latest] - python-version: [3.8, 3.9, 3.11] - torch-version: [2.0.0, 2.0.1] + python-version: [3.8, 3.9, 3.10, 3.11] + torch-version: [2.1.0, 2.1.1] fail-fast: false steps: diff --git a/.github/workflows/package_testing.yml b/.github/workflows/package_testing.yml index f2bf491..bf5e799 100644 --- a/.github/workflows/package_testing.yml +++ b/.github/workflows/package_testing.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: operating-system: [ubuntu-latest, windows-latest, macos-latest] - python-version: [3.10,3.11,3.12] + python-version: [3.8, 3.9, 3.10, 3.11] torch-version: [2.1.0, 2.1.1] fail-fast: false