Merge pull request #92 from kadirnar/delete-pytube-library

Add yt-dlp instead of pytube to download youtube video
kadirnar · May 7, 2024 · 67b7fec · 67b7fec
2 parents e66a741 + 47823c2
commit 67b7fec
Show file tree

Hide file tree

Showing 6 changed files with 117 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@
 ## 🛠️ Installation
 
 ```bash
-pip install whisperplus git+https://github.com/huggingface/transformers
+pip install git+https://github.com/huggingface/transformers
 pip install flash-attn --no-build-isolation
 ```
 
@@ -32,15 +32,15 @@ To use the whisperplus library, follow the steps below for different tasks:
 ### 🎵 Youtube URL to Audio
 
 ```python
-from whisperplus import SpeechToTextPipeline, download_and_convert_to_mp3
+from whisperplus import SpeechToTextPipeline, download_youtube_to_mp3
 from transformers import BitsAndBytesConfig, HqqConfig
 import torch
 
 url = "https://www.youtube.com/watch?v=di3rHkEZuUw"
-audio_path = download_and_convert_to_mp3(url)
+audio_path = download_youtube_to_mp3(url, output_dir="downloads", filename="test")
 
 hqq_config = HqqConfig(
-    nbits=1,
+    nbits=4,
     group_size=64,
     quant_zero=False,
     quant_scale=False,
@@ -78,7 +78,7 @@ print(transcript)
 ### 📰 Summarization
 
 ```python
-from whisperplus import TextSummarizationPipeline
+from whisperplus.pipelines.summarization import TextSummarizationPipeline
 
 summarizer = TextSummarizationPipeline(model_id="facebook/bart-large-cnn")
 summary = summarizer.summarize(transcript)
@@ -88,7 +88,7 @@ print(summary[0]["summary_text"])
 ### 📰 Long Text Support Summarization
 
 ```python
-from whisperplus import LongTextSummarizationPipeline
+from whisperplus.pipelines.long_text_summarization import LongTextSummarizationPipeline
 
 summarizer = LongTextSummarizationPipeline(model_id="facebook/bart-large-cnn")
 summary_text = summarizer.summarize(transcript)
@@ -102,13 +102,10 @@ pip install pyannote.audio>=3.1.0 pyannote.core>=5.0.0 pyannote.database>=5.0.1
 ```
 
 ```python
-from whisperplus import (
-    ASRDiarizationPipeline,
-    download_and_convert_to_mp3,
-    format_speech_to_dialogue,
-)
+from whisperplus.pipelines.whisper_diarize import ASRDiarizationPipeline
+from whisperplus import download_youtube_to_mp3, format_speech_to_dialogue
 
-audio_path = download_and_convert_to_mp3("https://www.youtube.com/watch?v=mRB14sFHw2E")
+audio_path = download_youtube_to_mp3("https://www.youtube.com/watch?v=mRB14sFHw2E")
 
 device = "cuda"  # cpu or mps
 pipeline = ASRDiarizationPipeline.from_pretrained(
@@ -153,7 +150,7 @@ pip install autollm>=0.1.9
 ```
 
 ```python
-from whisperplus import AutoLLMChatWithVideo
+from whisperplus.pipelines.autollm_chatbot import AutoLLMChatWithVideo
 
 # service_context_params
 system_prompt = """
@@ -192,19 +189,32 @@ print(response)
 ### 🎙️ Speech to Text
 
 ```python
-from whisperplus import TextToSpeechPipeline
+from whisperplus.pipelines.text2speech import TextToSpeechPipeline
 
 tts = TextToSpeechPipeline(model_id="suno/bark")
 audio = tts(text="Hello World", voice_preset="v2/en_speaker_6")
 ```
 
 ### 🎥 AutoCaption
 
+```bash
+pip install moviepy
+apt install imagemagick libmagick++-dev
+cat /etc/ImageMagick-6/policy.xml | sed 's/none/read,write/g'> /etc/ImageMagick-6/policy.xml
+```
+
 ```python
-from whisperplus import WhisperAutoCaptionPipeline
+from whisperplus.pipelines.whisper_autocaption import WhisperAutoCaptionPipeline
+from whisperplus import download_youtube_to_mp4
+
+video_path = download_youtube_to_mp4(
+    "https://www.youtube.com/watch?v=di3rHkEZuUw",
+    output_dir="downloads",
+    filename="test",
+)  # Optional
 
 caption = WhisperAutoCaptionPipeline(model_id="openai/whisper-large-v3")
-caption(video_path="test.mp4", output_path="output.mp4", language="turkish")
+caption(video_path=video_path, output_path="output.mp4", language="english")
 ```
 
 ## 😍 Contributing

diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,11 @@
 torch>=2.0.0
 torchvision>=0.15.0
 moviepy>=1.0.3
-pytube>=15.0.0
+yt_dlp
 Requests>=2.31.0
 accelerate
 bitsandbytes
 hqq
+ffmpeg
+ffmpeg-python
+pre-commit
diff --git a/whisperplus/__init__.py b/whisperplus/__init__.py
@@ -1,11 +1,11 @@
-from whisperplus.pipelines.autollm_chatbot import AutoLLMChatWithVideo
-from whisperplus.pipelines.long_text_summarization import LongTextSummarizationPipeline
-from whisperplus.pipelines.summarization import TextSummarizationPipeline
-from whisperplus.pipelines.text2speech import TextToSpeechPipeline
+# from whisperplus.pipelines.autollm_chatbot import AutoLLMChatWithVideo
+# from whisperplus.pipelines.long_text_summarization import LongTextSummarizationPipeline
+# from whisperplus.pipelines.summarization import TextSummarizationPipeline
+# from whisperplus.pipelines.text2speech import TextToSpeechPipeline
 from whisperplus.pipelines.whisper import SpeechToTextPipeline
-from whisperplus.pipelines.whisper_autocaption import WhisperAutoCaptionPipeline
-from whisperplus.pipelines.whisper_diarize import ASRDiarizationPipeline
-from whisperplus.utils.download_utils import download_and_convert_to_mp3
+# from whisperplus.pipelines.whisper_autocaption import WhisperAutoCaptionPipeline
+# from whisperplus.pipelines.whisper_diarize import ASRDiarizationPipeline
+from whisperplus.utils.download_utils import download_youtube_to_mp3, download_youtube_to_mp4
 from whisperplus.utils.text_utils import format_speech_to_dialogue
 
 __version__ = '0.3.3'

diff --git a/whisperplus/app.py b/whisperplus/app.py
@@ -2,7 +2,7 @@
 
 from whisperplus.pipelines.whisper import SpeechToTextPipeline
 from whisperplus.pipelines.whisper_diarize import ASRDiarizationPipeline
-from whisperplus.utils.download_utils import download_and_convert_to_mp3
+from whisperplus.utils.download_utils import download_youtube_to_mp3
 from whisperplus.utils.text_utils import format_speech_to_dialogue
 
 
@@ -20,7 +20,7 @@ def youtube_url_to_text(url, model_id, language_choice):
         transcript (str): The transcript of the speech-to-text conversion.
         video_path (str): The path of the downloaded video.
     """
-    video_path = download_and_convert_to_mp3(url)
+    video_path = download_youtube_to_mp3(url)
     pipeline = SpeechToTextPipeline(model_id)
     transcript = pipeline(audio_path=video_path, model_id=model_id, language=language_choice)
 
@@ -50,7 +50,7 @@ def speaker_diarization(url, model_id, device, num_speakers, min_speaker, max_sp
         device=device,
     )
 
-    audio_path = download_and_convert_to_mp3(url)
+    audio_path = download_youtube_to_mp3(url)
     output_text = pipeline(
         audio_path, num_speakers=num_speakers, min_speaker=min_speaker, max_speaker=max_speaker)
     dialogue = format_speech_to_dialogue(output_text)

diff --git a/whisperplus/test.py b/whisperplus/test.py
@@ -2,10 +2,10 @@
 from hqq.utils.patching import prepare_for_inference
 from pipelines.whisper import SpeechToTextPipeline
 from transformers import BitsAndBytesConfig, HqqConfig
-from utils.download_utils import download_and_convert_to_mp3
+from utils.download_utils import download_youtube_to_mp3
 
 url = "https://www.youtube.com/watch?v=BpN4hEAvDBg"
-audio_path = download_and_convert_to_mp3(url)
+audio_path = download_youtube_to_mp3(url)
 
 hqq_config = HqqConfig(
     nbits=1,

diff --git a/whisperplus/utils/download_utils.py b/whisperplus/utils/download_utils.py
@@ -1,42 +1,91 @@
 import logging
-import os
 from pathlib import Path
-from typing import Optional
 
-from moviepy.editor import AudioFileClip
-from pytube import YouTube
+import yt_dlp
 
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
 
 
-def download_and_convert_to_mp3(url: str,
-                                output_path: str = "output",
-                                filename: str = "test") -> Optional[str]:
+def download_youtube_to_mp3(url, output_dir='./', filename="test"):
+    """
+    Downloads a YouTube video as an MP3 file.
+
+    Parameters:
+    url (str): The URL of the YouTube video to download.
+    output_dir (str, optional): The directory to save the MP3 file. Defaults to the current working directory.
+    filename (str, optional): The filename for the MP3 file. If not provided, the video title will be used.
+
+    Returns:
+    pathlib.Path: The path to the downloaded MP3 file.
+    """
+    # Create the output directory if it doesn't exist
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Set the output filename
+    if filename is None:
+        filename = "%(title)s.%(ext)s"
+    else:
+        filename = f"{filename}.%(ext)s"
+
+    # Download the video using yt_dlp
+    ydl_opts = {
+        'outtmpl': str(output_dir / filename),
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+    }
+
     try:
-        yt = YouTube(url)
-        audio_stream = yt.streams.filter(only_audio=True).first()
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+    except yt_dlp.utils.DownloadError as e:
+        logging.error(f"Error downloading video: {e}")
+        return None
 
-        if audio_stream is None:
-            logging.warning("No audio streams found")
-            return None
+    output_path = output_dir / filename.replace('%(ext)s', 'mp3')
+    logging.info(f"Downloaded {output_path} as MP3")
+    return str(output_path)
 
-        Path(output_path).mkdir(parents=True, exist_ok=True)
 
-        mp3_file_path = os.path.join(output_path, filename + ".mp3")
-        logging.info(f"Downloading started... {mp3_file_path}")
+def download_youtube_to_mp4(url, output_dir='./', filename="test"):
+    """
+    Downloads a YouTube video as an MP4 file.
 
-        downloaded_file_path = audio_stream.download(output_path)
+    Parameters:
+    url (str): The URL of the YouTube video to download.
+    output_dir (str, optional): The directory to save the MP4 file. Defaults to the current working directory.
+    filename (str, optional): The filename for the MP4 file. If not provided, the video title will be used.
 
-        audio_clip = AudioFileClip(downloaded_file_path)
-        audio_clip.write_audiofile(mp3_file_path, codec="libmp3lame", verbose=False, logger=None)
-        audio_clip.close()
+    Returns:
+    str: The path to the downloaded MP4 file.
+    """
+    # Create the output directory if it doesn't exist
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
 
-        if Path(downloaded_file_path).suffix != ".mp3":
-            os.remove(downloaded_file_path)
+    # Set the output filename
+    if filename is None:
+        filename = "%(title)s.%(ext)s"
+    else:
+        filename = f"{filename}.%(ext)s"
 
-        logging.info(f"Download and conversion successful. File saved at: {mp3_file_path}")
-        return str(mp3_file_path)
+    # Download the video using yt_dlp
+    ydl_opts = {
+        'outtmpl': str(output_dir / filename),
+        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
+    }
 
-    except Exception as e:
-        logging.error(f"An error occurred: {e}")
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+    except yt_dlp.utils.DownloadError as e:
+        logging.error(f"Error downloading video: {e}")
         return None
+
+    output_path = output_dir / filename.replace('%(ext)s', 'mp4')
+    logging.info(f"Downloaded {output_path} as MP4")
+    return str(output_path)