Try Audio to Blog

AJaySi · Apr 8, 2024 · 54c51e5 · 54c51e5
1 parent 23b3c7f
commit 54c51e5
Show file tree

Hide file tree

Showing 9 changed files with 174 additions and 367 deletions.
diff --git a/alwrity.py b/alwrity.py
@@ -18,6 +18,7 @@
 from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher
 from lib.ai_web_researcher.metaphor_basic_neural_web_search import metaphor_find_similar
 from lib.ai_writers.keywords_to_blog import write_blog_from_keywords
+from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog
 
 
 def prompt_for_time_range():
@@ -32,7 +33,7 @@ def prompt_for_time_range():
 def write_blog_options():
     choices = [
         ("Keywords", "Keywords"),
-        ("Audio YouTube", "Audio YouTube"),
+        ("Audio To Blog", "Audio To Blog"),
         ("Programming", "Programming"),
         ("Scholar", "Scholar"),
         ("News/TBD", "News/TBD"),
@@ -195,9 +196,10 @@ def write_blog():
     if blog_type:
         if blog_type == 'Keywords':
             blog_from_keyword()
-        elif blog_type == 'Audio YouTube':
-            audio_youtube = prompt("Enter YouTube URL for audio blog generation:")
-            print(f"Write audio blog based on YouTube URL: {audio_youtube}")
+
+        elif blog_type == 'Audio To Blog':
+            blog_from_audio()
+
         elif blog_type == 'GitHub':
             github = prompt("Enter GitHub URL, CSV file, or topic:")
             print(f"Write blog based on GitHub: {github}")
@@ -209,6 +211,30 @@ def write_blog():
             raise typer.Exit()
 
 
+def blog_from_audio():
+    """
+    Prompt the user to input either a YouTube URL, a file location, or keywords to search on YouTube.
+    Validate the input and take appropriate actions based on the input type.
+    """
+
+    while True:
+        print("https://github.com/AJaySi/AI-Blog-Writer/wiki/Audio-to-blog-AI-article-writer-%E2%80%90-Alwrity-Speech-To-Text-Feature")
+        audio_input = prompt("""Enter Youtube video URL OR provide Full-Path to audio file.\n👋 : """)
+        # If the user cancels, exit the loop and the application
+        if audio_input is None:
+            break
+
+        # If the user presses OK without providing any input, prompt again
+        if not audio_input.strip():
+            continue
+
+        # Check if the input is a valid YouTube URL
+        if audio_input.startswith("https://www.youtube.com/") or audio_input.startswith("http://www.youtube.com/") or os.path.exists(audio_input):
+            # Validate YouTube URL, Process YouTube URL
+            generate_audio_blog(audio_input)
+            break
+
+
 def blog_from_keyword():
     """ Input blog keywords, research and write a factual blog."""
     while True:

diff --git a/lib/ai_writers/speech_to_blog/main_audio_to_blog.py b/lib/ai_writers/speech_to_blog/main_audio_to_blog.py
@@ -0,0 +1,70 @@
+import os
+import datetime #I wish
+import sys
+from textwrap import dedent
+import openai
+from tqdm import tqdm, trange
+import time
+
+from loguru import logger
+logger.remove()
+logger.add(sys.stdout,
+        colorize=True,
+        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
+    )
+
+from .write_blogs_from_youtube_videos import youtube_to_blog
+from ...ai_web_researcher.gpt_online_researcher import do_google_serp_search
+from ...ai_writers.combine_research_and_blog import blog_with_research
+from ...blog_metadata.get_blog_metadata import blog_metadata
+from ...blog_postprocessing.save_blog_to_file import save_blog_to_file
+
+
+def generate_audio_blog(audio_input):
+    """Takes a list of youtube videos and generates blog for each one of them.
+    """
+    # Use to store the blog in a string, to save in a *.md file.
+    blog_markdown_str = ""
+    try:
+        logger.info(f"Starting to write blog on URL: {audio_input}")
+        yt_blog, yt_title = youtube_to_blog(audio_input)
+    except Exception as e:
+        logger.error(f"Error in youtube_to_blog: {e}")
+        sys.exit(1)
+
+    try:
+        logger.info("Starting with online research for URL title.")
+        research_report = do_google_serp_search(yt_title)
+        print(research_report)
+    except Exception as e:
+        logger.error(f"Error in do_online_research: {e}")
+        sys.exit(1)
+
+    try:
+        # Note: Check if the order of input matters for your function
+        logger.info("Preparing a blog content from audio script and online research content...")
+        blog_markdown_str = blog_with_research(research_report, yt_blog)
+    except Exception as e:
+        logger.error(f"Error in blog_with_research: {e}")
+        sys.exit(1)
+
+    try:        
+        blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
+    except Exception as err:
+        logger.error(f"Failed to generate blog metadata: {err}")
+
+    try:
+        # TBD: Save the blog content as a .md file. Markdown or HTML ?
+        save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
+    except Exception as err:
+        logger.error(f"Failed to save final blog in a file: {err}")
+
+    blog_frontmatter = dedent(f"""\n\n\n\
+                ---
+                title: {blog_title}
+                categories: [{blog_categories}]
+                tags: [{blog_tags}]
+                Meta description: {blog_meta_desc.replace(":", "-")}
+                ---\n\n""")
+    logger.info(f"{blog_frontmatter}{blog_markdown_str}")
+    logger.info(f"\n\n ################ Finished writing Blog for : {audio_input} #################### \n")
diff --git a/...o_blog/write_blogs_from_youtube_videos.py → ...o_blog/write_blogs_from_youtube_videos.py b/...o_blog/write_blogs_from_youtube_videos.py → ...o_blog/write_blogs_from_youtube_videos.py
@@ -16,17 +16,15 @@
         format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
     )
 
-from .gpt_providers.stt_audio_blog import speech_to_text
-from .gpt_providers.openai_chat_completion import openai_chatgpt
+
+from ...gpt_providers.audio_to_text_generation.stt_audio_blog import speech_to_text
+from ...gpt_providers.text_generation.main_text_generation import llm_text_gen
 
 
 def youtube_to_blog(video_url):
     """Function to transcribe a given youtube url """
     # fixme: Doesnt work all types of yt urls.
     vid_id = video_url.split("=")[1]
-    #hti = Html2Image(output_path="../blog_images")
-    #hti.screenshot(url=video_url, save_as=f"yt-img-{vid_id}.png")
-    #yt_img_path = os.path.join("../blog_images", f"yt-img-{vid_id}.png")
 
     try:
         # Starting the speech-to-text process
@@ -44,7 +42,6 @@ def youtube_to_blog(video_url):
     except Exception as e:
         logger.error(f"Error in summarize_youtube_video: {e}")
         sys.exit(1)  # Exit the program due to error in summarize_youtube_video
-    return audio_blog_content
 
 
 def summarize_youtube_video(user_content, gpt_providers):
@@ -77,21 +74,9 @@ def summarize_youtube_video(user_content, gpt_providers):
         that will rank well in search engine results and engage readers effectively.
         Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
         """
-    if 'gemini' in gpt_providers:
-        try:
-            genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
-        except Exception as err:
-            logger.error("Failed in getting GEMINI_API_KEY")
-        # Use gemini-pro model for text and image.
-        model = genai.GenerativeModel('gemini-pro')
-        try:
-            response = model.generate_content(prompt)
-            return response.text
-        except Exception as err:
-            logger.error("Failed to get response from gemini.")
-    elif 'openai' in gpt_providers:
-        try:
-            response = openai_chatgpt(prompt)
-            return response
-        except Exception as err:
-            SystemError(f"Error in generating blog summary: {err}")
+    try:
+        response = llm_text_gen(prompt)
+        return response
+    except Exception as err:
+        logger.error(f"Failed to summarize_youtube_video: {err}")
+        exit(1)
diff --git a/lib/blog_metadata/get_blog_metadata.py b/lib/blog_metadata/get_blog_metadata.py
@@ -13,7 +13,7 @@
     )
 
 
-def blog_metadata(blog_content, search_keywords, blog_titles):
+def blog_metadata(blog_content, search_keywords=None, blog_titles=None):
     """ Common function to get blog metadata """
     blog_title = generate_blog_title(blog_content, search_keywords, blog_titles)
     blog_meta_desc = generate_blog_description(blog_content)

diff --git a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
@@ -34,22 +34,27 @@ def speech_to_text(video_url, output_path='.'):
         SystemExit: If a critical error occurs that prevents successful execution.
     """
     try:
-        logger.info(f"Accessing YouTube URL: {video_url}")
-        yt = YouTube(video_url, on_progress_callback=progress_function)
-
-        logger.info("Fetching the highest quality audio stream")
-        audio_stream = yt.streams.filter(only_audio=True).first()
-
-        if audio_stream is None:
-            logger.warning("No audio stream found for this video.")
-            return None
-
-        #logger.info(f"Downloading audio for: {yt.title}")
-        global progress_bar
-        progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
-        audio_file = audio_stream.download(output_path)
-        progress_bar.close()
-        logger.info(f"Audio downloaded: {yt.title} to {output_path}")
+        audio_file = None
+        if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"):
+            logger.info(f"Accessing YouTube URL: {video_url}")
+            yt = YouTube(video_url, on_progress_callback=progress_function)
+
+            logger.info("Fetching the highest quality audio stream")
+            audio_stream = yt.streams.filter(only_audio=True).first()
+
+            if audio_stream is None:
+                logger.warning("No audio stream found for this video.")
+                return None
+
+            logger.info(f"Downloading audio for: {yt.title}")
+            global progress_bar
+            progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
+            audio_file = audio_stream.download(output_path)
+            progress_bar.close()
+            logger.info(f"Audio downloaded: {yt.title} to {output_path}")
+        # Audio filepath from local directory.
+        elif os.path.exists(audio_input):
+            audio_file = video_url
 
         # Checking file size
         max_file_size = 24 * 1024 * 1024  # 24MB
@@ -59,6 +64,8 @@ def speech_to_text(video_url, output_path='.'):
         logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
         if file_size > max_file_size:
             logger.error("File size exceeds 24MB limit.")
+            # FIXME: We can chunk hour long videos, the code is not tested.
+            #long_video(audio_file)
             sys.exit("File size limit exceeded.")
 
         try:
@@ -86,3 +93,43 @@ def speech_to_text(video_url, output_path='.'):
         if os.path.exists(audio_file):
             os.remove(audio_file)
             logger.info("Temporary audio file removed.")
+
+
+def long_video(temp_file_name):
+    """
+    Transcribes a YouTube video using OpenAI's Whisper API by processing the video in chunks.
+
+    This function handles videos longer than the context limit of the Whisper API by dividing the video into
+    10-minute segments, transcribing each segment individually, and then combining the results.
+
+    Key Changes and Notes:
+    1. Video Splitting: Splits the audio into 10-minute chunks using the moviepy library.
+    2. Chunk Transcription: Each audio chunk is transcribed separately and the results are concatenated.
+    3. Temporary Files for Chunks: Uses temporary files for each audio chunk for transcription.
+    4. Error Handling: Exception handling is included to capture and return any errors during the process.
+    5. Logging: Process steps are logged for debugging and monitoring.
+    6. Cleaning Up: Removes temporary files for both the entire video and individual audio chunks after processing.
+
+    Args:
+        video_url (str): URL of the YouTube video to be transcribed.
+    """
+    # Extract audio and split into chunks
+    app.logger.info(f"Processing the YT video: {temp_file_name}")
+    full_audio = mp.AudioFileClip(temp_file_name)
+    duration = full_audio.duration
+    chunk_length = 600  # 10 minutes in seconds
+    chunks = [full_audio.subclip(start, min(start + chunk_length, duration)) for start in range(0, int(duration), chunk_length)]
+
+    combined_transcript = ""
+    for i, chunk in enumerate(chunks):
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as audio_chunk_file:
+            chunk.write_audiofile(audio_chunk_file.name, codec="mp3")
+            with open(audio_chunk_file.name, "rb") as audio_file:
+                # Transcribe each chunk using OpenAI's Whisper API
+                app.logger.info(f"Transcribing chunk {i+1}/{len(chunks)}")
+                transcript = openai.Audio.transcribe("whisper-1", audio_file)
+                combined_transcript += transcript['text'] + "\n\n"
+
+            # Remove the chunk audio file
+            os.remove(audio_chunk_file.name)
+
diff --git a/lib/gpt_providers/text_generation/gemini_pro_text.py b/lib/gpt_providers/text_generation/gemini_pro_text.py
@@ -35,7 +35,7 @@ def gemini_text_response(prompt, temperature, top_p, n, max_tokens):
         "top_k": n,
         "max_output_tokens": max_tokens
     }
-    model = genai.GenerativeModel(model_name="gemini-pro", generation_config=generation_config)
+    model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
     try:
         response = model.generate_content(prompt, stream=True)
         for chunk in response: