-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Support for pdf added and can generate the summary or user can ask …
…for it
- Loading branch information
1 parent
a389152
commit eedab68
Showing
6 changed files
with
108 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -101,3 +101,5 @@ wcwidth==0.2.13 | |
webencodings==0.5.1 | ||
yarg==0.1.9 | ||
youtube-transcript-api==0.6.2 | ||
|
||
PyPDF2~=3.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from pdf.pypdf_helper import PyPdfHelper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
from PyPDF2 import PdfReader | ||
from rich import console | ||
import logging | ||
from concurrent.futures import ThreadPoolExecutor | ||
|
||
console = console.Console() | ||
|
||
# logging.basicConfig(level=logging.INFO) | ||
# Disable all logging | ||
logging.disable(logging.CRITICAL) | ||
|
||
|
||
class PyPdfHelper: | ||
def __init__(self, path: str): | ||
"""Initializes the PyPdfHelper class to read PDF files using PyPDF2 library | ||
Args: | ||
path (str): Path to the PDF file | ||
""" | ||
try: | ||
self.path = path | ||
self.reader = PdfReader(path) | ||
except Exception as e: | ||
logging.error('Error reading PDF file: %s', e) | ||
raise | ||
|
||
def _extract_page_text(self, page_num: int) -> str: | ||
try: | ||
return self.reader.pages[page_num].extract_text() | ||
except Exception as e: | ||
logging.error('Error extracting text from page %d: %s', page_num, e) | ||
return "" | ||
|
||
def get_text(self, start: int = 0, end: int = None) -> str: | ||
"""Extracts text from the PDF file from the given start and end page numbers | ||
Args: | ||
start (int, optional): Start page index. Default to 0. | ||
end (int, optional): End page index. Defaults to None. | ||
Returns: | ||
str: Extracted text from the PDF file | ||
""" | ||
try: | ||
end = end if end is not None else self.reader.getNumPages() | ||
pages = range(start, end) | ||
|
||
with ThreadPoolExecutor() as executor: | ||
texts = executor.map(self._extract_page_text, pages) | ||
|
||
return "".join(texts) | ||
except Exception as e: | ||
logging.error('Error reading PDF file: %s', e) | ||
raise |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
console = Console() | ||
|
||
__name__ = 'gemini-cli' | ||
__version__ = '0.1.0' | ||
__version__ = '0.2.0' | ||
__author__ = 'Ayaan' | ||
__author_email__ = '[email protected]' | ||
__description__ = 'A CLI for Gemini' | ||
|