Skip to content

Commit

Permalink
Try Audio to Blog
Browse files Browse the repository at this point in the history
  • Loading branch information
AJaySi committed Apr 8, 2024
1 parent 23b3c7f commit 54c51e5
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 367 deletions.
34 changes: 30 additions & 4 deletions alwrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher
from lib.ai_web_researcher.metaphor_basic_neural_web_search import metaphor_find_similar
from lib.ai_writers.keywords_to_blog import write_blog_from_keywords
from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog


def prompt_for_time_range():
Expand All @@ -32,7 +33,7 @@ def prompt_for_time_range():
def write_blog_options():
choices = [
("Keywords", "Keywords"),
("Audio YouTube", "Audio YouTube"),
("Audio To Blog", "Audio To Blog"),
("Programming", "Programming"),
("Scholar", "Scholar"),
("News/TBD", "News/TBD"),
Expand Down Expand Up @@ -195,9 +196,10 @@ def write_blog():
if blog_type:
if blog_type == 'Keywords':
blog_from_keyword()
elif blog_type == 'Audio YouTube':
audio_youtube = prompt("Enter YouTube URL for audio blog generation:")
print(f"Write audio blog based on YouTube URL: {audio_youtube}")

elif blog_type == 'Audio To Blog':
blog_from_audio()

elif blog_type == 'GitHub':
github = prompt("Enter GitHub URL, CSV file, or topic:")
print(f"Write blog based on GitHub: {github}")
Expand All @@ -209,6 +211,30 @@ def write_blog():
raise typer.Exit()


def blog_from_audio():
"""
Prompt the user to input either a YouTube URL, a file location, or keywords to search on YouTube.
Validate the input and take appropriate actions based on the input type.
"""

while True:
print("https://github.com/AJaySi/AI-Blog-Writer/wiki/Audio-to-blog-AI-article-writer-%E2%80%90-Alwrity-Speech-To-Text-Feature")
audio_input = prompt("""Enter Youtube video URL OR provide Full-Path to audio file.\n👋 : """)
# If the user cancels, exit the loop and the application
if audio_input is None:
break

# If the user presses OK without providing any input, prompt again
if not audio_input.strip():
continue

# Check if the input is a valid YouTube URL
if audio_input.startswith("https://www.youtube.com/") or audio_input.startswith("http://www.youtube.com/") or os.path.exists(audio_input):
# Validate YouTube URL, Process YouTube URL
generate_audio_blog(audio_input)
break


def blog_from_keyword():
""" Input blog keywords, research and write a factual blog."""
while True:
Expand Down
70 changes: 70 additions & 0 deletions lib/ai_writers/speech_to_blog/main_audio_to_blog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import datetime #I wish
import sys
from textwrap import dedent
import openai
from tqdm import tqdm, trange
import time

from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)

from .write_blogs_from_youtube_videos import youtube_to_blog
from ...ai_web_researcher.gpt_online_researcher import do_google_serp_search
from ...ai_writers.combine_research_and_blog import blog_with_research
from ...blog_metadata.get_blog_metadata import blog_metadata
from ...blog_postprocessing.save_blog_to_file import save_blog_to_file


def generate_audio_blog(audio_input):
"""Takes a list of youtube videos and generates blog for each one of them.
"""
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
try:
logger.info(f"Starting to write blog on URL: {audio_input}")
yt_blog, yt_title = youtube_to_blog(audio_input)
except Exception as e:
logger.error(f"Error in youtube_to_blog: {e}")
sys.exit(1)

try:
logger.info("Starting with online research for URL title.")
research_report = do_google_serp_search(yt_title)
print(research_report)
except Exception as e:
logger.error(f"Error in do_online_research: {e}")
sys.exit(1)

try:
# Note: Check if the order of input matters for your function
logger.info("Preparing a blog content from audio script and online research content...")
blog_markdown_str = blog_with_research(research_report, yt_blog)
except Exception as e:
logger.error(f"Error in blog_with_research: {e}")
sys.exit(1)

try:
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
except Exception as err:
logger.error(f"Failed to generate blog metadata: {err}")

try:
# TBD: Save the blog content as a .md file. Markdown or HTML ?
save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
except Exception as err:
logger.error(f"Failed to save final blog in a file: {err}")

blog_frontmatter = dedent(f"""\n\n\n\
---
title: {blog_title}
categories: [{blog_categories}]
tags: [{blog_tags}]
Meta description: {blog_meta_desc.replace(":", "-")}
---\n\n""")
logger.info(f"{blog_frontmatter}{blog_markdown_str}")
logger.info(f"\n\n ################ Finished writing Blog for : {audio_input} #################### \n")
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,15 @@
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)

from .gpt_providers.stt_audio_blog import speech_to_text
from .gpt_providers.openai_chat_completion import openai_chatgpt

from ...gpt_providers.audio_to_text_generation.stt_audio_blog import speech_to_text
from ...gpt_providers.text_generation.main_text_generation import llm_text_gen


def youtube_to_blog(video_url):
"""Function to transcribe a given youtube url """
# fixme: Doesnt work all types of yt urls.
vid_id = video_url.split("=")[1]
#hti = Html2Image(output_path="../blog_images")
#hti.screenshot(url=video_url, save_as=f"yt-img-{vid_id}.png")
#yt_img_path = os.path.join("../blog_images", f"yt-img-{vid_id}.png")

try:
# Starting the speech-to-text process
Expand All @@ -44,7 +42,6 @@ def youtube_to_blog(video_url):
except Exception as e:
logger.error(f"Error in summarize_youtube_video: {e}")
sys.exit(1) # Exit the program due to error in summarize_youtube_video
return audio_blog_content


def summarize_youtube_video(user_content, gpt_providers):
Expand Down Expand Up @@ -77,21 +74,9 @@ def summarize_youtube_video(user_content, gpt_providers):
that will rank well in search engine results and engage readers effectively.
Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
"""
if 'gemini' in gpt_providers:
try:
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
except Exception as err:
logger.error("Failed in getting GEMINI_API_KEY")
# Use gemini-pro model for text and image.
model = genai.GenerativeModel('gemini-pro')
try:
response = model.generate_content(prompt)
return response.text
except Exception as err:
logger.error("Failed to get response from gemini.")
elif 'openai' in gpt_providers:
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in generating blog summary: {err}")
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Failed to summarize_youtube_video: {err}")
exit(1)
2 changes: 1 addition & 1 deletion lib/blog_metadata/get_blog_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)


def blog_metadata(blog_content, search_keywords, blog_titles):
def blog_metadata(blog_content, search_keywords=None, blog_titles=None):
""" Common function to get blog metadata """
blog_title = generate_blog_title(blog_content, search_keywords, blog_titles)
blog_meta_desc = generate_blog_description(blog_content)
Expand Down
79 changes: 63 additions & 16 deletions lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,27 @@ def speech_to_text(video_url, output_path='.'):
SystemExit: If a critical error occurs that prevents successful execution.
"""
try:
logger.info(f"Accessing YouTube URL: {video_url}")
yt = YouTube(video_url, on_progress_callback=progress_function)

logger.info("Fetching the highest quality audio stream")
audio_stream = yt.streams.filter(only_audio=True).first()

if audio_stream is None:
logger.warning("No audio stream found for this video.")
return None

#logger.info(f"Downloading audio for: {yt.title}")
global progress_bar
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
audio_file = audio_stream.download(output_path)
progress_bar.close()
logger.info(f"Audio downloaded: {yt.title} to {output_path}")
audio_file = None
if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"):
logger.info(f"Accessing YouTube URL: {video_url}")
yt = YouTube(video_url, on_progress_callback=progress_function)

logger.info("Fetching the highest quality audio stream")
audio_stream = yt.streams.filter(only_audio=True).first()

if audio_stream is None:
logger.warning("No audio stream found for this video.")
return None

logger.info(f"Downloading audio for: {yt.title}")
global progress_bar
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
audio_file = audio_stream.download(output_path)
progress_bar.close()
logger.info(f"Audio downloaded: {yt.title} to {output_path}")
# Audio filepath from local directory.
elif os.path.exists(audio_input):
audio_file = video_url

# Checking file size
max_file_size = 24 * 1024 * 1024 # 24MB
Expand All @@ -59,6 +64,8 @@ def speech_to_text(video_url, output_path='.'):
logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
if file_size > max_file_size:
logger.error("File size exceeds 24MB limit.")
# FIXME: We can chunk hour long videos, the code is not tested.
#long_video(audio_file)
sys.exit("File size limit exceeded.")

try:
Expand Down Expand Up @@ -86,3 +93,43 @@ def speech_to_text(video_url, output_path='.'):
if os.path.exists(audio_file):
os.remove(audio_file)
logger.info("Temporary audio file removed.")


def long_video(temp_file_name):
"""
Transcribes a YouTube video using OpenAI's Whisper API by processing the video in chunks.
This function handles videos longer than the context limit of the Whisper API by dividing the video into
10-minute segments, transcribing each segment individually, and then combining the results.
Key Changes and Notes:
1. Video Splitting: Splits the audio into 10-minute chunks using the moviepy library.
2. Chunk Transcription: Each audio chunk is transcribed separately and the results are concatenated.
3. Temporary Files for Chunks: Uses temporary files for each audio chunk for transcription.
4. Error Handling: Exception handling is included to capture and return any errors during the process.
5. Logging: Process steps are logged for debugging and monitoring.
6. Cleaning Up: Removes temporary files for both the entire video and individual audio chunks after processing.
Args:
video_url (str): URL of the YouTube video to be transcribed.
"""
# Extract audio and split into chunks
app.logger.info(f"Processing the YT video: {temp_file_name}")
full_audio = mp.AudioFileClip(temp_file_name)
duration = full_audio.duration
chunk_length = 600 # 10 minutes in seconds
chunks = [full_audio.subclip(start, min(start + chunk_length, duration)) for start in range(0, int(duration), chunk_length)]

combined_transcript = ""
for i, chunk in enumerate(chunks):
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as audio_chunk_file:
chunk.write_audiofile(audio_chunk_file.name, codec="mp3")
with open(audio_chunk_file.name, "rb") as audio_file:
# Transcribe each chunk using OpenAI's Whisper API
app.logger.info(f"Transcribing chunk {i+1}/{len(chunks)}")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
combined_transcript += transcript['text'] + "\n\n"

# Remove the chunk audio file
os.remove(audio_chunk_file.name)

2 changes: 1 addition & 1 deletion lib/gpt_providers/text_generation/gemini_pro_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def gemini_text_response(prompt, temperature, top_p, n, max_tokens):
"top_k": n,
"max_output_tokens": max_tokens
}
model = genai.GenerativeModel(model_name="gemini-pro", generation_config=generation_config)
model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
try:
response = model.generate_content(prompt, stream=True)
for chunk in response:
Expand Down
Loading

0 comments on commit 54c51e5

Please sign in to comment.