Add 4bit and 8bit models for Lightning-Mlx library

kadirnar · May 12, 2024 · 22f1f67 · 22f1f67
1 parent 29cbf84
commit 22f1f67
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 13 deletions.
diff --git a/whisperplus/pipelines/lightning_whisper_mlx/lightning.py b/whisperplus/pipelines/lightning_whisper_mlx/lightning.py
@@ -49,6 +49,8 @@
     },
     "distil-large-v3": {
         "base": "mustafaaljadery/distil-whisper-mlx",
+        "4bit": "mustafaaljadery/distil-whisper-mlx-4bit",
+        "8bit": "mustafaaljadery/distil-whisper-mlx-8bit",
     },
 }
 
@@ -91,9 +93,10 @@ def __init__(self, model, batch_size=12, quant=None):
         hf_hub_download(repo_id=repo_id, filename=filename2, local_dir=local_dir)
 
     def transcribe(self, audio_path, language=None):
+        breakpoint()
         result = transcribe_audio(
             audio_path,
-            path_or_hf_repo=f'./mlx_models/{self.name}',
+            path_or_hf_repo=f'mlx_models/{self.name}',
             language=language,
             batch_size=self.batch_size)
         return result
diff --git a/whisperplus/pipelines/mlx_whisper/__init__.py b/whisperplus/pipelines/mlx_whisper/__init__.py
@@ -1,5 +1,5 @@
 # Copyright © 2023-2024 Apple Inc.
 
 from . import audio, decoding, load_models
-from .transcribxe import transcribe
+from .transcribe import transcribe
 from .version import __version__
diff --git a/whisperplus/pipelines/mlx_whisper/requirements.txt b/whisperplus/pipelines/mlx_whisper/requirements.txt
diff --git a/whisperplus/test.py b/whisperplus/test.py
@@ -1,5 +1,4 @@
 import torch
-from hqq.utils.patching import prepare_for_inference
 from pipelines.whisper import SpeechToTextPipeline
 from transformers import BitsAndBytesConfig, HqqConfig
 from utils.download_utils import download_youtube_to_mp3
@@ -8,7 +7,7 @@
 audio_path = download_youtube_to_mp3(url)
 
 hqq_config = HqqConfig(
-    nbits=1,
+    nbits=4,
     group_size=64,
     quant_zero=False,
     quant_scale=False,