- Export to docx is working !!

aiyu-ayaan · Jun 13, 2024 · 832585f · 832585f
1 parent 993da8d
commit 832585f
Show file tree

Hide file tree

Showing 7 changed files with 120 additions and 15 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,5 @@
 .env
 src/gemini-cli/key.txt
 dist/
-build/
+build/
+*.docx
diff --git a/docs/commands.md b/docs/commands.md
@@ -3,12 +3,17 @@
 This Markdown table provides a comprehensive overview of the command-line arguments available for interacting with
 Gemini:
 
-| Argument     | Shorthand | Type                         | Description                                                                                                                | Default      |
-|--------------|-----------|------------------------------|----------------------------------------------------------------------------------------------------------------------------|--------------|
-| `question`   | `-q`      | `str`                        | The question you want to ask Gemini. This is a required argument.                                                          | **Required** |
-| `word-limit` | `-wl`     | `int`                        | Sets the maximum number of words Gemini should use in its response. A value of 0 indicates no limit.                       | 0            |
-| `info`       | `-i`      | `InfoAction` (custom action) | Displays information about the Gemini CLI.                                                                                 |              |
-| `youtube`    | `-yt`     | `str`                        | Provides a YouTube URL from which Gemini can extract the transcript and potentially answer questions based on the content. |              |
+| Argument           | Shorthand | Type                         | Description                                                                                                                | Default      |
+|--------------------|-----------|------------------------------|----------------------------------------------------------------------------------------------------------------------------|--------------|
+| `question`         | `-q`      | `str`                        | The question you want to ask Gemini. This is a required argument.                                                          | **Required** |
+| `word-limit`       | `-wl`     | `int`                        | Sets the maximum number of words Gemini should use in its response. A value of 0 indicates no limit.                       | 0            |
+| `info`             | `-i`      | `InfoAction` (custom action) | Displays information about the Gemini CLI.                                                                                 |              |
+| `youtube`          | `-yt`     | `str`                        | Provides a YouTube URL from which Gemini can extract the transcript and potentially answer questions based on the content. |              |
+| `pdf`              | `p`       | `str`                        | PDF file path to summarize                                                                                                 |              |
+| `start-page-index` | `spi`     | `int`                        | Start page index for the PDF file                                                                                          | 0            |
+| `end-page-index`   | `epi`     | `int`                        | End page index for the PDF file                                                                                            |              |           
+| `export-docx`      | `ed`      | `str`                        | Export the response to docx file                                                                                           |              |
+| `output-path`      | `op`      | `str`                        | Output path for the docx file                                                                                              | .            |
 
 **Detailed Explanations:**
 
@@ -23,6 +28,15 @@ Gemini:
 * **`youtube` (`-yt`)**: This optional argument allows you to provide a YouTube URL. Gemini can potentially extract the
   transcript from the video and use it as a source of information to answer your questions. This functionality depends
   on Gemini's capabilities and might not be available in all cases.
+* **`pdf` (`-p`)**: This optional argument allows you to specify the path to a PDF file that you want Gemini to
+  summarize.
+* **`start-page-index` (`-spi`)**: This optional argument allows you to specify the start page index for the PDF file.
+* **`end-page-index` (`-epi`)**: This optional argument allows you to specify the end page index for the PDF file.
+* **`export-docx` (`-ed`)**: This optional argument allows you to export the response to a docx file.
+* **`output-path` (`-op`)**: This optional argument allows you to specify the output path for the docx file. If not
+  specified, the docx file will be saved in the current directory.
+* **Note**: The availability of certain features like PDF summarization or YouTube transcript processing may depend on
+  the specific implementation of Gemini and the underlying libraries or APIs it uses.
 
 **Example Usage:**
 
@@ -38,4 +52,16 @@ gemini -i
 
 # (if supported) Ask a question based on a YouTube transcript
 gemini -q "Who is the director of this movie?" -yt https://www.youtube.com/watch?v=...
+
+# Summarize a PDF file
+gemini -p "path/to/file.pdf"
+
+# Summarize a specific range of pages from a PDF file
+gemini -p "path/to/file.pdf" -spi 5 -epi 10
+
+# Ask a question from a provided PDF file
+gemini -p "path/to/file.pdf" -q "What is the main idea of this document?"
+
+# Export the response to a docx file
+gemini -q "What is the capital of France?" -ed "output.docx"
 ```
diff --git a/requirements.txt b/requirements.txt
@@ -101,5 +101,5 @@ wcwidth==0.2.13
 webencodings==0.5.1
 yarg==0.1.9
 youtube-transcript-api==0.6.2
-
-PyPDF2~=3.0.1
+PyPDF2~=3.0.1
+pypandoc~=1.13
diff --git a/src/gemini-cli/export/ExportDocx.py b/src/gemini-cli/export/ExportDocx.py
@@ -0,0 +1,53 @@
+import pypandoc as pydoc
+import subprocess
+from rich import console
+
+console = console.Console()
+
+
+def _is_pandoc_installed():
+    """Check if Pandoc is installed
+    Returns:
+        bool: True if Pandoc is installed, False otherwise
+    """
+    try:
+        output = subprocess.check_output(['pandoc', '--version'])
+        return True
+    except FileNotFoundError:
+        return False
+
+
+def export_md_to_docx(input_text: str, file_name='output.docx', output_file_path: str = '.'):
+    """Export Markdown text to docx file
+    Args:
+        input_text (str): Markdown text
+        file_name (str): Name of the output docx file. Default to 'output.docx'.
+        output_file_path (str): Path to the output docx file. Default to current directory.
+    """
+    # Check if Pandoc is installed
+    if not _is_pandoc_installed():
+        console.log("Pandoc is not installed. Installing Pandoc is required to export to docx format.")
+        try:
+            console.log("Installing Pandoc...", style="bold")
+            pydoc.download_pandoc()
+            console.log("Pandoc installation complete.")
+        except Exception as e:
+            console.log(f"Error installing Pandoc: {e}\nPlease install Pandoc manually and try again.",
+                        style="bold red")
+            return None
+
+    # check output file path end with / or not if yes then remove it
+    if output_file_path.endswith('/'):
+        output_file_path = output_file_path[:-1]
+
+    # check filename ends with .docx or not if not then add it
+    if not file_name.endswith('.docx'):
+        file_name = file_name + '.docx'
+    # Convert and save the output to the specified file
+    try:
+        with console.status("Exporting to docx...", spinner="dots"):
+            pydoc.convert_text(input_text, 'docx', format='md', outputfile=f'{output_file_path}/{file_name}')
+        console.log(f"Markdown text exported to docx: {output_file_path}/{file_name}", style="bold green")
+    except Exception as e:
+        console.log(f"Error converting to docx: {e}", style="bold red")
+        return None
diff --git a/src/gemini-cli/export/__init__.py b/src/gemini-cli/export/__init__.py
@@ -0,0 +1 @@
+from export.ExportDocx import export_md_to_docx
diff --git a/src/gemini-cli/gemini-cli.py b/src/gemini-cli/gemini-cli.py
@@ -4,6 +4,7 @@
 import argparse
 import sys
 from version import InfoAction
+from export import export_md_to_docx
 
 console = console.Console()
 
@@ -68,6 +69,8 @@ def load_key_from_root() -> str:
     parser.add_argument('--pdf', '-p', type=str, help='PDF file path to summarize')
     parser.add_argument('--start-page-index', '-spi', type=int, help='Start page index for the PDF file', default=0)
     parser.add_argument('--end-page-index', '-epi', type=int, help='End page index for the PDF file', default=None)
+    parser.add_argument('--export-docx', '-ed', type=str, help='Export the response to docx file')
+    parser.add_argument('--output-path', '-op', type=str, help='Output path for the docx file', default='.')
 
     # parse the arguments
     args = parser.parse_args()
@@ -77,6 +80,8 @@ def load_key_from_root() -> str:
     pdf_file_path = args.pdf
     start_page_index = args.start_page_index
     end_page_index = args.end_page_index
+    export_docx = args.export_docx
+    output_path = args.output_path
 
     g = gemini.Gemini(key=key)
 
@@ -89,15 +94,22 @@ def load_key_from_root() -> str:
             end=end_page_index
         )
         # console.print(text, style='bold green')
-        g.generate_response_from_pdf(text, question, max_words)
+        response = g.generate_response_from_pdf(text, question, max_words)
+        if export_docx and len(response) > 0 and response:
+            export_md_to_docx(response, export_docx, output_path)
         sys.exit(0)
 
     if youtube_url:
-        g.summarize_transcript(youtube_url=youtube_url, max_words=max_words, question=question if question else '')
+        response = g.summarize_transcript(youtube_url=youtube_url, max_words=max_words,
+                                          question=question if question else '')
+        if export_docx and len(response) > 0 and response:
+            export_md_to_docx(response, export_docx, output_path)
         sys.exit(0)
     # check if the question is empty
     if len(question) == 0:
         print('Question cannot be empty')
         sys.exit(1)
     # create a Gemini instance
-    g.ask(question.strip(), max_words)
+    response = g.ask(question.strip(), max_words)
+    if export_docx and len(response) > 0 and response:
+        export_md_to_docx(response, export_docx, output_path)
diff --git a/src/gemini-cli/gemini/gemini.py b/src/gemini-cli/gemini/gemini.py
@@ -18,12 +18,14 @@ def __init__(self, key):
         genai.configure(api_key=key)
         self.__model = genai.GenerativeModel('gemini-1.5-flash')
 
-    def ask(self, question: str, max_words: int = 0):
+    def ask(self, question: str, max_words: int = 0) -> str | None:
         """Ask a question to the model
 
         Args:
             question (str): Question to ask
             max_words (int, optional): Word limit. Default to 0.
+        Returns:
+            str | None: Response from the model
         """
         console.print(f'🐼', f'Asking: {question}\n', style='bold blue')
         with console.status('[bold green]Generating response...', spinner='moon'):
@@ -38,17 +40,21 @@ def ask(self, question: str, max_words: int = 0):
 
         if has_error:
             console.print('❌', f' Error: {response}', style='bold red')
+            return None
         else:
             markdown = Markdown(response)
             console.print(markdown, style='bold green')
+        return response
 
-    def summarize_transcript(self, youtube_url: str, question: str = '', max_words: int = 0):
+    def summarize_transcript(self, youtube_url: str, question: str = '', max_words: int = 0) -> str | None:
         """Summarize a transcript from a YouTube video or can answer a question from the transcript
 
         Args:
             youtube_url (str): link to the YouTube video
             question (str, optional): Question to want to ask. Defaults to ''.
             max_words (int, optional): Word limit. Default to 0.
+        Returns:
+            str | None: Response from the transcript
         """
         console.print(f'🐼', f'Getting transcript from: {youtube_url}\n', style='bold blue')
         with console.status(f'[bold green]{'Generating answer...' if question else 'Generating summary...'}',
@@ -71,17 +77,21 @@ def summarize_transcript(self, youtube_url: str, question: str = '', max_words:
                 response = e
         if has_error:
             console.print('❌', f' Error: {response}', style='bold red')
+            return None
         else:
             markdown = Markdown(response)
             console.print(markdown, style='bold green')
+        return response
 
-    def generate_response_from_pdf(self, text: str, question: str, max_words: int = 0):
+    def generate_response_from_pdf(self, text: str, question: str, max_words: int = 0) -> str | None:
         """Generate a response from the PDF
 
         Args:
             text (str): Text to generate a response from
             question (str): Question to ask
             max_words (int, optional): Word limit. Default to 0.
+        Returns:
+            str | None: Response from the PDF
         """
         console.print(f'🐼', f'Generating response from the PDF\n', style='bold blue')
         with console.status('[bold green]Generating response...', spinner='moon'):
@@ -98,6 +108,8 @@ def generate_response_from_pdf(self, text: str, question: str, max_words: int =
 
         if has_error:
             console.print('❌', f' Error: {response}', style='bold red')
+            return None
         else:
             markdown = Markdown(response)
             console.print(markdown, style='bold green')
+        return response
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,4 +3,5 @@ @@
     .env
     src/gemini-cli/key.txt
     dist/
-    build/
+    build/
+    *.docx
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from export.ExportDocx import export_md_to_docx