From 0aab9f8331c26eb10395913e382561029dea4a47 Mon Sep 17 00:00:00 2001
From: kadirnar <kadir.nar@hotmail.com>
Date: Fri, 24 Nov 2023 17:50:36 +0300
Subject: [PATCH] Update ASR model and add batch size and return timestamps
 options

---
 README.md                                | 2 +-
 whisperplus/pipelines/whisper_diarize.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ccdab65..a07369d 100644
--- a/README.md
+++ b/README.md
@@ -79,7 +79,7 @@ pipeline = ASRDiarizationPipeline.from_pretrained(
     device=device,
 )
 
-output_text = pipeline(audio_path)
+output_text = pipeline(audio_path, num_speakers=2, min_speaker=1, max_speaker=2)
 dialogue = format_speech_to_dialogue(output_text)
 print(dialogue)
 ```
diff --git a/whisperplus/pipelines/whisper_diarize.py b/whisperplus/pipelines/whisper_diarize.py
index 4244059..56390d0 100644
--- a/whisperplus/pipelines/whisper_diarize.py
+++ b/whisperplus/pipelines/whisper_diarize.py
@@ -24,7 +24,7 @@ def __init__(
     @classmethod
     def from_pretrained(
         cls,
-        asr_model: Optional[str] = "openai/whisper-medium",
+        asr_model: Optional[str] = "openai/whisper-large-v3",
         *,
         diarizer_model: Optional[str] = "pyannote/speaker-diarization",
         chunk_length_s: Optional[int] = 30,
@@ -35,7 +35,9 @@ def from_pretrained(
             "automatic-speech-recognition",
             model=asr_model,
             chunk_length_s=chunk_length_s,
-            token=use_auth_token,  # 08/25/2023: Changed argument from use_auth_token to token
+            token=use_auth_token,
+            batch_size=24,
+            return_timestamps=True,
             **kwargs,
         )
         diarization_pipeline = Pipeline.from_pretrained(diarizer_model, use_auth_token=use_auth_token)