Resolve

h2oai · Oct 27, 2024 · d305e30 · d305e30
2 parents ec53525 + 42e9a05
commit d305e30
Show file tree

Hide file tree

Showing 7 changed files with 36 additions and 26 deletions.
diff --git a/docs/run_patches.sh b/docs/run_patches.sh
@@ -15,12 +15,12 @@ sed -i  's/with HiddenPrints():/if True:/g' $sp/langchain_community/utilities/se
 # find "$sp" -type f -name "*.py" -exec sed -i -E 's/(sys\.stdout\s*=\s*.*)/pass # \1/; s/(sys\.stderr\s*=\s*.*)/pass # \1/' {} +
 
 # use pytubefix instead, pytube too old and various issues
-sed -i 's/Pytube/PytubeFix/g'  $sp/fiftyone/utils/youtube.py
-sed -i 's/pytube>=15/pytube>=6/g' $sp/fiftyone/utils/youtube.py
-sed -i 's/pytube/pytubefix/g' $sp/fiftyone/utils/youtube.py
+#sed -i 's/Pytube/PytubeFix/g'  $sp/fiftyone/utils/youtube.py
+#sed -i 's/pytube>=15/pytube>=6/g' $sp/fiftyone/utils/youtube.py
+#sed -i 's/pytube/pytubefix/g' $sp/fiftyone/utils/youtube.py
 
 # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/pytubefix/extract.py ~/extract.py > docs/pytubefix.patch
-patch $sp/pytubefix/extract.py docs/pytubefix.patch
+#patch $sp/pytubefix/extract.py docs/pytubefix.patch
 
 # fix asyncio same way websockets was fixed, else keep hitting errors in async calls
 # https://github.com/python-websockets/websockets/commit/f9fd2cebcd42633ed917cd64e805bea17879c2d7

diff --git a/openai_server/agent_prompting.py b/openai_server/agent_prompting.py
@@ -397,7 +397,7 @@ def get_ask_question_about_image_helper(base_url, api_key, model):
 # execution: true
 python {cwd}/openai_server/agent_tools/ask_question_about_image.py --query "QUERY" --file "LOCAL FILE NAME"
 ```
-* usage: {cwd}/openai_server/agent_tools/ask_question_about_image.py [-h] [--timeout TIMEOUT] [--system_prompt SYSTEM_PROMPT] --query "QUERY" [--url URL] [--file FILE]
+* usage: {cwd}/openai_server/agent_tools/ask_question_about_image.py [-h] --query "QUERY" [--url URL] [--file FILE] [--system_prompt SYSTEM_PROMPT]
 * ask_question_about_image gives a text response for either a URL or local file
 * ask_question_about_image can be used to critique any image, e.g. a plot, a photo, a screenshot, etc. either made by code generation or among provided files or among URLs.
 * ask_question_about_image accepts most image files allowed by PIL (Pillow) except svg.
@@ -583,13 +583,14 @@ def get_rag_helper(base_url, api_key, model, autogen_timeout, text_context_list,
 ```sh
 # filename: my_question_about_documents.sh
 # execution: true
-python {cwd}/openai_server/agent_tools/ask_question_about_documents.py --query "QUERY" [--files FILES [FILES ...]] [--urls URLS [URLS ...]]
+python {cwd}/openai_server/agent_tools/ask_question_about_documents.py --query "QUERY" [--files FILES [FILES ...]] [--urls URLS [URLS ...]] [--csv]
 ```
 * usage: {cwd}/openai_server/agent_tools/ask_question_about_documents.py [-h] --query "QUERY" [-b BASELINE] [--system_prompt SYSTEM_PROMPT] [--files FILES [FILES ...]]
 * Do not include any file names in your QUERY, just query the document content.
 * ask_question_about_documents.py --files can be any local image(s) (png, jpg, etc.), local textual file(s) (txt, json, python, xml, md, html, rtf, rst, etc.), or local document(s) (pdf, docx, doc, epub, pptx, ppt, xls, xlsx)
 * ask_question_about_documents.py --urls can be any url(s) (http://www.cnn.com, https://aiindex.stanford.edu/wp-content/uploads/2024/04/HAI_2024_AI-Index-Report.pdf, etc.).
 * Do not use ask_question_about_documents.py just to query individual images, use ask_question_about_image.py for that.
+* If need structured output for data analysis, use --csv
 """
     if text_context_list or image_file:
         rag_helper += "* Absolutely you should always run ask_question_about_documents once with -b to get a baseline answer if the user has provided documents.\n"

diff --git a/openai_server/agent_tools/ask_question_about_documents.py b/openai_server/agent_tools/ask_question_about_documents.py
@@ -130,17 +130,17 @@ def ask_question_about_documents():
         image_files = []
 
     parser = argparse.ArgumentParser(description="RAG Tool")
-    parser.add_argument("-p", "--prompt", "--query", type=str, required=True, help="User prompt or query")
-    parser.add_argument("-j", "--json", action="store_true", default=False, help="Output results as JSON")
-    parser.add_argument("-c", "--csv", action="store_true", default=False, help="Output results as CSV")
-    parser.add_argument("-b", "--baseline", required=False, action='store_true',
+    parser.add_argument("--prompt", "--query", type=str, required=True, help="User prompt or query")
+    parser.add_argument("--json", action="store_true", default=False, help="Output results as JSON")
+    parser.add_argument("--csv", action="store_true", default=False, help="Output results as CSV")
+    parser.add_argument("--baseline", required=False, action='store_true',
                         help="Whether to get baseline from user docs")
     parser.add_argument("--files", nargs="+", required=False,
                         help="Files of documents with optionally additional images to ask question about.")
     parser.add_argument("--urls", nargs="+", required=False,
                         help="URLs to ask question about")
     parser.add_argument("-m", "--model", type=str, required=False, help="OpenAI or Open Source model to use")
-    parser.add_argument("--max_time", type=float, required=False, default=default_max_time,
+    parser.add_argument("--timeout", type=float, required=False, default=default_max_time,
                         help="Maximum time to wait for response")
     parser.add_argument("--system_prompt", type=str, required=False, default=system_prompt, help="System prompt")
     parser.add_argument("--chat_conversation_file", type=str, required=False,
@@ -217,13 +217,16 @@ def ask_question_about_documents():
                       chat_conversation=chat_conversation,
                       model=args.model,
                       system_prompt=args.system_prompt,
-                      max_time=args.max_time,
+                      max_time=args.timeout,
                       )
 
-    is_small = len(text_context_list) < 4 * 8192
+    is_small = len(text_context_list) < 4 * 1024
 
     if args.csv or is_small:
-        prompt_csv = "Extract all information in a well-organized form as a CSV so it can be used for data analysis or plotting.  Try to make a single CSV if possible.  Ensure each CSV block of output is inside a code block with triple backticks with the csv language tag."
+        if not args.prompt:
+            prompt_csv = "Extract all information in a well-organized form as a CSV so it can be used for data analysis or plotting.  Try to make a single CSV if possible.  Ensure each CSV block of output is inside a code block with triple backticks with the csv language tag."
+        else:
+            prompt_csv = "Extract requested information in a well-organized form as a CSV so it can be used for data analysis or plotting.  Try to make a single CSV if possible.  Ensure each CSV block of output is inside a code block with triple backticks with the csv language tag.\n\nRequested information: " + args.prompt
         csv_answer = get_rag_answer(prompt_csv, tag='', simple=True, **rag_kwargs)
         matches = re.findall(r'```(?:[a-zA-Z]*)\n(.*?)```', csv_answer, re.DOTALL)
         for match in matches:
@@ -232,7 +235,7 @@ def ask_question_about_documents():
                 f.write(match)
             print(f"CSV output written to {csv_filename}. You can use this with code generation in order to answer the user's question or obtain some intermediate step using pandas etc.  Remember, you are not good at solving puzzles, math, or doing question-answer on tabular data, so use these results in python code in order to solve such tasks.\n")
 
-    if args.json or is_small:
+    if args.json:
         json_kwargs = rag_kwargs.copy()
         json_kwargs['guided_json'] = None
         json_kwargs['response_format'] = 'json_object'

diff --git a/openai_server/agent_tools/ask_question_about_image.py b/openai_server/agent_tools/ask_question_about_image.py
@@ -49,20 +49,20 @@ def main():
     default_max_time = int(os.getenv('H2OGPT_AGENT_OPENAI_TIMEOUT', "120"))
 
     parser = argparse.ArgumentParser(description="OpenAI Vision API Script")
-    parser.add_argument("-t", "--timeout", type=int, default=60, help="Timeout for API calls")
-    parser.add_argument("-s", "--system_prompt", type=str,
+    parser.add_argument("--timeout", type=int, default=60, help="Timeout for API calls")
+    parser.add_argument("--system_prompt", type=str,
                         default="""You are a highly capable AI assistant with advanced vision capabilities.
 * Analyze the provided image thoroughly and provide detailed, accurate descriptions or answers based on what you see.
 * Consider various aspects such as objects, people, actions, text, colors, composition, and any other relevant details.
 * If asked a specific question about the image, focus your response on addressing that question directly.
 * Ensure you add a critique of the image, if anything seems wrong, or if anything requires improvement.""",
                         help="System prompt")
-    parser.add_argument("-p", "--prompt", type=str, required=True, help="User prompt")
-    parser.add_argument("-u", "--url", type=str, help="URL of the image")
-    parser.add_argument("-f", "--file", type=str,
+    parser.add_argument("--prompt", "--query", type=str, required=True, help="User prompt")
+    parser.add_argument("--url", type=str, help="URL of the image")
+    parser.add_argument("--file", type=str,
                         help="Path to the image file. Accepts standard image formats (e.g., PNG, JPEG, JPG), SVG, and PDF files.")
-    parser.add_argument("-m", "--model", type=str, help="OpenAI or Open Source model to use")
-    parser.add_argument("-T", "--temperature", type=float, default=0.0, help="Temperature for the model")
+    parser.add_argument("--model", type=str, help="OpenAI or Open Source model to use")
+    parser.add_argument("--temperature", type=float, default=0.0, help="Temperature for the model")
     parser.add_argument("--max_tokens", type=int, default=1024, help="Maximum tokens for the model")
     parser.add_argument("--stream_output", help="Whether to stream output", default=True, action='store_true')
     parser.add_argument("--max_time", type=float, default=default_max_time, help="Maximum time to wait for response")

diff --git a/reqs_optional/requirements_optional_image.txt b/reqs_optional/requirements_optional_image.txt
@@ -7,4 +7,4 @@ yt-dlp>=2024.8.6
 # moviepy>=0.5.1
 
 # for fiftyone with patches
-pytubefix==6.15.4
+pytubefix==8.1.1
diff --git a/src/version.py b/src/version.py
@@ -1 +1 @@
-__version__ = "215c19927e72f9a533d39d92a649d91547051913"
+__version__ = "ec5352532e799bc124638a75f4e58b0b93b4a5ee"
diff --git a/src/vision/extract_movie.py b/src/vision/extract_movie.py
@@ -1,4 +1,5 @@
 import os
+import sys
 import uuid
 
 from src.utils import makedirs, sanitize_filename, get_gradio_tmp
@@ -14,8 +15,13 @@ def extract_unique_frames(urls=None, file=None, download_dir=None, export_dir=No
         makedirs(download_dir, exist_ok=True)
     # os.environ['FIFTYONE_DISABLE_SERVICES'] = 'True'
     if urls:
-        import fiftyone.utils.youtube as fouy
-        fouy.download_youtube_videos(urls, download_dir=download_dir)
+        if 'openai_server' not in sys.path:
+            sys.path.append('openai_server')
+        from openai_server.agent_tools.download_web_video import download_web_video
+        for url in urls:
+            download_web_video(video_url=url, base_url="https://www.youtube.com", output_dir=download_dir)
+        #import fiftyone.utils.youtube as fouy
+        #fouy.download_youtube_videos(urls, download_dir=download_dir)
 
     # Create a FiftyOne Dataset
     import fiftyone as fo
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = "215c19927e72f9a533d39d92a649d91547051913"
		__version__ = "ec5352532e799bc124638a75f4e58b0b93b4a5ee"