- Support for pdf added and can generate the summary or user can ask …

…for it
aiyu-ayaan · Jun 12, 2024 · eedab68 · eedab68
1 parent a389152
commit eedab68
Show file tree

Hide file tree

Showing 6 changed files with 108 additions and 8 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -101,3 +101,5 @@ wcwidth==0.2.13
 webencodings==0.5.1
 yarg==0.1.9
 youtube-transcript-api==0.6.2
+
+PyPDF2~=3.0.1
diff --git a/src/gemini-cli/gemini-cli.py b/src/gemini-cli/gemini-cli.py
@@ -1,5 +1,6 @@
 from rich import console
 import gemini as gemini
+import pdf as pdf
 import argparse
 import sys
 from version import InfoAction
@@ -58,19 +59,37 @@ def load_key_from_root() -> str:
     # load the key from the root
     key = load_key_from_root()
     parser = argparse.ArgumentParser(description='Gemini CLI')
-    parser.add_argument('--question', '-q', type=str, help='Question to ask Gemini')
+    parser.add_argument('--question', '-q', type=str, help='Question to ask Gemini', default='')
     parser.add_argument('--word-limit', '-wl', help='Word limit for the response', type=int, default=0)
     parser.add_argument('--info', '-i', action=InfoAction, help='About Gemini CLI')
     parser.add_argument('--youtube', '-yt', type=str, help='YouTube URL to get transcript from')
+    parser.add_argument('--pdf', '-p', type=str, help='PDF file path to summarize')
+    parser.add_argument('--start-page-index', '-spi', type=int, help='Start page index for the PDF file', default=0)
+    parser.add_argument('--end-page-index', '-epi', type=int, help='End page index for the PDF file', default=None)
 
     # parse the arguments
     args = parser.parse_args()
     question = args.question
     max_words = args.word_limit
     youtube_url = args.youtube
+    pdf_file_path = args.pdf
+    start_page_index = args.start_page_index
+    end_page_index = args.end_page_index
 
     g = gemini.Gemini(key=key)
 
+    # check if the pdf file path is not empty
+    if pdf_file_path:
+        p = pdf.PyPdfHelper(pdf_file_path)
+        console.print('Summarizing PDF...\nThis will take a significant time', style='bold blue')
+        text = p.get_text(
+            start=start_page_index,
+            end=end_page_index
+        )
+        # console.print(text, style='bold green')
+        g.generate_response_from_pdf(text, question, max_words)
+        sys.exit(0)
+
     if youtube_url:
         g.summarize_transcript(youtube_url=youtube_url, max_words=max_words, question=question if question else '')
         sys.exit(0)

diff --git a/src/gemini-cli/gemini/gemini.py b/src/gemini-cli/gemini/gemini.py
@@ -2,7 +2,7 @@
 from rich.console import Console
 from rich.markdown import Markdown
 
-from youtube_transcript import YoutubeTranscript
+from gemini.youtube_transcript import YoutubeTranscript
 
 console = Console()
 
@@ -23,7 +23,7 @@ def ask(self, question: str, max_words: int = 0):
 
         Args:
             question (str): Question to ask
-            max_words (int, optional): Word limit. Defaults to 0.
+            max_words (int, optional): Word limit. Default to 0.
         """
         console.print(f'🐼', f'Asking: {question}\n', style='bold blue')
         with console.status('[bold green]Generating response...', spinner='moon'):
@@ -43,12 +43,12 @@ def ask(self, question: str, max_words: int = 0):
             console.print(markdown, style='bold green')
 
     def summarize_transcript(self, youtube_url: str, question: str = '', max_words: int = 0):
-        """Summarize a transcript from a youtube video or can answer a question from the transcript
+        """Summarize a transcript from a YouTube video or can answer a question from the transcript
 
         Args:
-            youtube_url (str): link to the youtube video
-            question (str, optional): Question to wants to ask. Defaults to ''.
-            max_words (int, optional): Word limit. Defaults to 0.
+            youtube_url (str): link to the YouTube video
+            question (str, optional): Question to want to ask. Defaults to ''.
+            max_words (int, optional): Word limit. Default to 0.
         """
         console.print(f'🐼', f'Getting transcript from: {youtube_url}\n', style='bold blue')
         with console.status(f'[bold green]{'Generating answer...' if question else 'Generating summary...'}',
@@ -74,3 +74,30 @@ def summarize_transcript(self, youtube_url: str, question: str = '', max_words:
         else:
             markdown = Markdown(response)
             console.print(markdown, style='bold green')
+
+    def generate_response_from_pdf(self, text: str, question: str, max_words: int = 0):
+        """Generate a response from the PDF
+
+        Args:
+            text (str): Text to generate a response from
+            question (str): Question to ask
+            max_words (int, optional): Word limit. Default to 0.
+        """
+        console.print(f'🐼', f'Generating response from the PDF\n', style='bold blue')
+        with console.status('[bold green]Generating response...', spinner='moon'):
+            try:
+                has_error = False
+                question = text + '\n\nSummaries the context as elaborated possible' if len(
+                    question) == 0 else text + f'\n\nQuestion: {question}'
+                response = self.__model.generate_content(
+                    question if max_words == 0 else question + f' Word limit {max_words}'
+                ).text
+            except Exception as e:
+                has_error = True
+                response = e
+
+        if has_error:
+            console.print('❌', f' Error: {response}', style='bold red')
+        else:
+            markdown = Markdown(response)
+            console.print(markdown, style='bold green')
diff --git a/src/gemini-cli/pdf/__init__.py b/src/gemini-cli/pdf/__init__.py
@@ -0,0 +1 @@
+from pdf.pypdf_helper import PyPdfHelper
diff --git a/src/gemini-cli/pdf/pypdf_helper.py b/src/gemini-cli/pdf/pypdf_helper.py
@@ -0,0 +1,51 @@
+from PyPDF2 import PdfReader
+from rich import console
+import logging
+from concurrent.futures import ThreadPoolExecutor
+
+console = console.Console()
+
+# logging.basicConfig(level=logging.INFO)
+# Disable all logging
+logging.disable(logging.CRITICAL)
+
+
+class PyPdfHelper:
+    def __init__(self, path: str):
+        """Initializes the PyPdfHelper class to read PDF files using PyPDF2 library
+        Args:
+            path (str): Path to the PDF file
+        """
+        try:
+            self.path = path
+            self.reader = PdfReader(path)
+        except Exception as e:
+            logging.error('Error reading PDF file: %s', e)
+            raise
+
+    def _extract_page_text(self, page_num: int) -> str:
+        try:
+            return self.reader.pages[page_num].extract_text()
+        except Exception as e:
+            logging.error('Error extracting text from page %d: %s', page_num, e)
+            return ""
+
+    def get_text(self, start: int = 0, end: int = None) -> str:
+        """Extracts text from the PDF file from the given start and end page numbers
+        Args:
+            start (int, optional): Start page index. Default to 0.
+            end (int, optional): End page index. Defaults to None.
+        Returns:
+            str: Extracted text from the PDF file
+        """
+        try:
+            end = end if end is not None else self.reader.getNumPages()
+            pages = range(start, end)
+
+            with ThreadPoolExecutor() as executor:
+                texts = executor.map(self._extract_page_text, pages)
+
+            return "".join(texts)
+        except Exception as e:
+            logging.error('Error reading PDF file: %s', e)
+            raise
diff --git a/version.py → src/gemini-cli/version.py b/version.py → src/gemini-cli/version.py
@@ -5,7 +5,7 @@
 console = Console()
 
 __name__ = 'gemini-cli'
-__version__ = '0.1.0'
+__version__ = '0.2.0'
 __author__ = 'Ayaan'
 __author_email__ = '[email protected]'
 __description__ = 'A CLI for Gemini'