Skip to content

Commit

Permalink
Merge pull request #92 from kadirnar/delete-pytube-library
Browse files Browse the repository at this point in the history
Add yt-dlp instead of pytube to download youtube video
  • Loading branch information
kadirnar committed May 7, 2024
2 parents e66a741 + 47823c2 commit 67b7fec
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 55 deletions.
42 changes: 26 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
## 🛠️ Installation

```bash
pip install whisperplus git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/transformers
pip install flash-attn --no-build-isolation
```

Expand All @@ -32,15 +32,15 @@ To use the whisperplus library, follow the steps below for different tasks:
### 🎵 Youtube URL to Audio

```python
from whisperplus import SpeechToTextPipeline, download_and_convert_to_mp3
from whisperplus import SpeechToTextPipeline, download_youtube_to_mp3
from transformers import BitsAndBytesConfig, HqqConfig
import torch

url = "https://www.youtube.com/watch?v=di3rHkEZuUw"
audio_path = download_and_convert_to_mp3(url)
audio_path = download_youtube_to_mp3(url, output_dir="downloads", filename="test")

hqq_config = HqqConfig(
nbits=1,
nbits=4,
group_size=64,
quant_zero=False,
quant_scale=False,
Expand Down Expand Up @@ -78,7 +78,7 @@ print(transcript)
### 📰 Summarization

```python
from whisperplus import TextSummarizationPipeline
from whisperplus.pipelines.summarization import TextSummarizationPipeline

summarizer = TextSummarizationPipeline(model_id="facebook/bart-large-cnn")
summary = summarizer.summarize(transcript)
Expand All @@ -88,7 +88,7 @@ print(summary[0]["summary_text"])
### 📰 Long Text Support Summarization

```python
from whisperplus import LongTextSummarizationPipeline
from whisperplus.pipelines.long_text_summarization import LongTextSummarizationPipeline

summarizer = LongTextSummarizationPipeline(model_id="facebook/bart-large-cnn")
summary_text = summarizer.summarize(transcript)
Expand All @@ -102,13 +102,10 @@ pip install pyannote.audio>=3.1.0 pyannote.core>=5.0.0 pyannote.database>=5.0.1
```

```python
from whisperplus import (
ASRDiarizationPipeline,
download_and_convert_to_mp3,
format_speech_to_dialogue,
)
from whisperplus.pipelines.whisper_diarize import ASRDiarizationPipeline
from whisperplus import download_youtube_to_mp3, format_speech_to_dialogue

audio_path = download_and_convert_to_mp3("https://www.youtube.com/watch?v=mRB14sFHw2E")
audio_path = download_youtube_to_mp3("https://www.youtube.com/watch?v=mRB14sFHw2E")

device = "cuda" # cpu or mps
pipeline = ASRDiarizationPipeline.from_pretrained(
Expand Down Expand Up @@ -153,7 +150,7 @@ pip install autollm>=0.1.9
```

```python
from whisperplus import AutoLLMChatWithVideo
from whisperplus.pipelines.autollm_chatbot import AutoLLMChatWithVideo

# service_context_params
system_prompt = """
Expand Down Expand Up @@ -192,19 +189,32 @@ print(response)
### 🎙️ Speech to Text

```python
from whisperplus import TextToSpeechPipeline
from whisperplus.pipelines.text2speech import TextToSpeechPipeline

tts = TextToSpeechPipeline(model_id="suno/bark")
audio = tts(text="Hello World", voice_preset="v2/en_speaker_6")
```

### 🎥 AutoCaption

```bash
pip install moviepy
apt install imagemagick libmagick++-dev
cat /etc/ImageMagick-6/policy.xml | sed 's/none/read,write/g'> /etc/ImageMagick-6/policy.xml
```

```python
from whisperplus import WhisperAutoCaptionPipeline
from whisperplus.pipelines.whisper_autocaption import WhisperAutoCaptionPipeline
from whisperplus import download_youtube_to_mp4

video_path = download_youtube_to_mp4(
"https://www.youtube.com/watch?v=di3rHkEZuUw",
output_dir="downloads",
filename="test",
) # Optional

caption = WhisperAutoCaptionPipeline(model_id="openai/whisper-large-v3")
caption(video_path="test.mp4", output_path="output.mp4", language="turkish")
caption(video_path=video_path, output_path="output.mp4", language="english")
```

## 😍 Contributing
Expand Down
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
torch>=2.0.0
torchvision>=0.15.0
moviepy>=1.0.3
pytube>=15.0.0
yt_dlp
Requests>=2.31.0
accelerate
bitsandbytes
hqq
ffmpeg
ffmpeg-python
pre-commit
14 changes: 7 additions & 7 deletions whisperplus/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from whisperplus.pipelines.autollm_chatbot import AutoLLMChatWithVideo
from whisperplus.pipelines.long_text_summarization import LongTextSummarizationPipeline
from whisperplus.pipelines.summarization import TextSummarizationPipeline
from whisperplus.pipelines.text2speech import TextToSpeechPipeline
# from whisperplus.pipelines.autollm_chatbot import AutoLLMChatWithVideo
# from whisperplus.pipelines.long_text_summarization import LongTextSummarizationPipeline
# from whisperplus.pipelines.summarization import TextSummarizationPipeline
# from whisperplus.pipelines.text2speech import TextToSpeechPipeline
from whisperplus.pipelines.whisper import SpeechToTextPipeline
from whisperplus.pipelines.whisper_autocaption import WhisperAutoCaptionPipeline
from whisperplus.pipelines.whisper_diarize import ASRDiarizationPipeline
from whisperplus.utils.download_utils import download_and_convert_to_mp3
# from whisperplus.pipelines.whisper_autocaption import WhisperAutoCaptionPipeline
# from whisperplus.pipelines.whisper_diarize import ASRDiarizationPipeline
from whisperplus.utils.download_utils import download_youtube_to_mp3, download_youtube_to_mp4
from whisperplus.utils.text_utils import format_speech_to_dialogue

__version__ = '0.3.3'
Expand Down
6 changes: 3 additions & 3 deletions whisperplus/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from whisperplus.pipelines.whisper import SpeechToTextPipeline
from whisperplus.pipelines.whisper_diarize import ASRDiarizationPipeline
from whisperplus.utils.download_utils import download_and_convert_to_mp3
from whisperplus.utils.download_utils import download_youtube_to_mp3
from whisperplus.utils.text_utils import format_speech_to_dialogue


Expand All @@ -20,7 +20,7 @@ def youtube_url_to_text(url, model_id, language_choice):
transcript (str): The transcript of the speech-to-text conversion.
video_path (str): The path of the downloaded video.
"""
video_path = download_and_convert_to_mp3(url)
video_path = download_youtube_to_mp3(url)
pipeline = SpeechToTextPipeline(model_id)
transcript = pipeline(audio_path=video_path, model_id=model_id, language=language_choice)

Expand Down Expand Up @@ -50,7 +50,7 @@ def speaker_diarization(url, model_id, device, num_speakers, min_speaker, max_sp
device=device,
)

audio_path = download_and_convert_to_mp3(url)
audio_path = download_youtube_to_mp3(url)
output_text = pipeline(
audio_path, num_speakers=num_speakers, min_speaker=min_speaker, max_speaker=max_speaker)
dialogue = format_speech_to_dialogue(output_text)
Expand Down
4 changes: 2 additions & 2 deletions whisperplus/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from hqq.utils.patching import prepare_for_inference
from pipelines.whisper import SpeechToTextPipeline
from transformers import BitsAndBytesConfig, HqqConfig
from utils.download_utils import download_and_convert_to_mp3
from utils.download_utils import download_youtube_to_mp3

url = "https://www.youtube.com/watch?v=BpN4hEAvDBg"
audio_path = download_and_convert_to_mp3(url)
audio_path = download_youtube_to_mp3(url)

hqq_config = HqqConfig(
nbits=1,
Expand Down
101 changes: 75 additions & 26 deletions whisperplus/utils/download_utils.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,91 @@
import logging
import os
from pathlib import Path
from typing import Optional

from moviepy.editor import AudioFileClip
from pytube import YouTube
import yt_dlp

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')


def download_and_convert_to_mp3(url: str,
output_path: str = "output",
filename: str = "test") -> Optional[str]:
def download_youtube_to_mp3(url, output_dir='./', filename="test"):
"""
Downloads a YouTube video as an MP3 file.
Parameters:
url (str): The URL of the YouTube video to download.
output_dir (str, optional): The directory to save the MP3 file. Defaults to the current working directory.
filename (str, optional): The filename for the MP3 file. If not provided, the video title will be used.
Returns:
pathlib.Path: The path to the downloaded MP3 file.
"""
# Create the output directory if it doesn't exist
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)

# Set the output filename
if filename is None:
filename = "%(title)s.%(ext)s"
else:
filename = f"{filename}.%(ext)s"

# Download the video using yt_dlp
ydl_opts = {
'outtmpl': str(output_dir / filename),
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
}

try:
yt = YouTube(url)
audio_stream = yt.streams.filter(only_audio=True).first()
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
except yt_dlp.utils.DownloadError as e:
logging.error(f"Error downloading video: {e}")
return None

if audio_stream is None:
logging.warning("No audio streams found")
return None
output_path = output_dir / filename.replace('%(ext)s', 'mp3')
logging.info(f"Downloaded {output_path} as MP3")
return str(output_path)

Path(output_path).mkdir(parents=True, exist_ok=True)

mp3_file_path = os.path.join(output_path, filename + ".mp3")
logging.info(f"Downloading started... {mp3_file_path}")
def download_youtube_to_mp4(url, output_dir='./', filename="test"):
"""
Downloads a YouTube video as an MP4 file.
downloaded_file_path = audio_stream.download(output_path)
Parameters:
url (str): The URL of the YouTube video to download.
output_dir (str, optional): The directory to save the MP4 file. Defaults to the current working directory.
filename (str, optional): The filename for the MP4 file. If not provided, the video title will be used.
audio_clip = AudioFileClip(downloaded_file_path)
audio_clip.write_audiofile(mp3_file_path, codec="libmp3lame", verbose=False, logger=None)
audio_clip.close()
Returns:
str: The path to the downloaded MP4 file.
"""
# Create the output directory if it doesn't exist
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)

if Path(downloaded_file_path).suffix != ".mp3":
os.remove(downloaded_file_path)
# Set the output filename
if filename is None:
filename = "%(title)s.%(ext)s"
else:
filename = f"{filename}.%(ext)s"

logging.info(f"Download and conversion successful. File saved at: {mp3_file_path}")
return str(mp3_file_path)
# Download the video using yt_dlp
ydl_opts = {
'outtmpl': str(output_dir / filename),
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
}

except Exception as e:
logging.error(f"An error occurred: {e}")
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
except yt_dlp.utils.DownloadError as e:
logging.error(f"Error downloading video: {e}")
return None

output_path = output_dir / filename.replace('%(ext)s', 'mp4')
logging.info(f"Downloaded {output_path} as MP4")
return str(output_path)

0 comments on commit 67b7fec

Please sign in to comment.