diff --git a/docs/run_patches.sh b/docs/run_patches.sh index 5430b9aaf..127327e03 100755 --- a/docs/run_patches.sh +++ b/docs/run_patches.sh @@ -15,12 +15,12 @@ sed -i 's/with HiddenPrints():/if True:/g' $sp/langchain_community/utilities/se # find "$sp" -type f -name "*.py" -exec sed -i -E 's/(sys\.stdout\s*=\s*.*)/pass # \1/; s/(sys\.stderr\s*=\s*.*)/pass # \1/' {} + # use pytubefix instead, pytube too old and various issues -sed -i 's/Pytube/PytubeFix/g' $sp/fiftyone/utils/youtube.py -sed -i 's/pytube>=15/pytube>=6/g' $sp/fiftyone/utils/youtube.py -sed -i 's/pytube/pytubefix/g' $sp/fiftyone/utils/youtube.py +#sed -i 's/Pytube/PytubeFix/g' $sp/fiftyone/utils/youtube.py +#sed -i 's/pytube>=15/pytube>=6/g' $sp/fiftyone/utils/youtube.py +#sed -i 's/pytube/pytubefix/g' $sp/fiftyone/utils/youtube.py # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/pytubefix/extract.py ~/extract.py > docs/pytubefix.patch -patch $sp/pytubefix/extract.py docs/pytubefix.patch +#patch $sp/pytubefix/extract.py docs/pytubefix.patch # fix asyncio same way websockets was fixed, else keep hitting errors in async calls # https://github.com/python-websockets/websockets/commit/f9fd2cebcd42633ed917cd64e805bea17879c2d7 diff --git a/openai_server/agent_prompting.py b/openai_server/agent_prompting.py index 5895eee62..8249f8067 100644 --- a/openai_server/agent_prompting.py +++ b/openai_server/agent_prompting.py @@ -397,7 +397,7 @@ def get_ask_question_about_image_helper(base_url, api_key, model): # execution: true python {cwd}/openai_server/agent_tools/ask_question_about_image.py --query "QUERY" --file "LOCAL FILE NAME" ``` -* usage: {cwd}/openai_server/agent_tools/ask_question_about_image.py [-h] [--timeout TIMEOUT] [--system_prompt SYSTEM_PROMPT] --query "QUERY" [--url URL] [--file FILE] +* usage: {cwd}/openai_server/agent_tools/ask_question_about_image.py [-h] --query "QUERY" [--url URL] [--file FILE] [--system_prompt SYSTEM_PROMPT] * ask_question_about_image gives a text response for either a URL or local file * ask_question_about_image can be used to critique any image, e.g. a plot, a photo, a screenshot, etc. either made by code generation or among provided files or among URLs. * ask_question_about_image accepts most image files allowed by PIL (Pillow) except svg. @@ -583,13 +583,14 @@ def get_rag_helper(base_url, api_key, model, autogen_timeout, text_context_list, ```sh # filename: my_question_about_documents.sh # execution: true -python {cwd}/openai_server/agent_tools/ask_question_about_documents.py --query "QUERY" [--files FILES [FILES ...]] [--urls URLS [URLS ...]] +python {cwd}/openai_server/agent_tools/ask_question_about_documents.py --query "QUERY" [--files FILES [FILES ...]] [--urls URLS [URLS ...]] [--csv] ``` * usage: {cwd}/openai_server/agent_tools/ask_question_about_documents.py [-h] --query "QUERY" [-b BASELINE] [--system_prompt SYSTEM_PROMPT] [--files FILES [FILES ...]] * Do not include any file names in your QUERY, just query the document content. * ask_question_about_documents.py --files can be any local image(s) (png, jpg, etc.), local textual file(s) (txt, json, python, xml, md, html, rtf, rst, etc.), or local document(s) (pdf, docx, doc, epub, pptx, ppt, xls, xlsx) * ask_question_about_documents.py --urls can be any url(s) (http://www.cnn.com, https://aiindex.stanford.edu/wp-content/uploads/2024/04/HAI_2024_AI-Index-Report.pdf, etc.). * Do not use ask_question_about_documents.py just to query individual images, use ask_question_about_image.py for that. +* If need structured output for data analysis, use --csv """ if text_context_list or image_file: rag_helper += "* Absolutely you should always run ask_question_about_documents once with -b to get a baseline answer if the user has provided documents.\n" diff --git a/openai_server/agent_tools/ask_question_about_documents.py b/openai_server/agent_tools/ask_question_about_documents.py index 78185e27e..3d38eefb8 100644 --- a/openai_server/agent_tools/ask_question_about_documents.py +++ b/openai_server/agent_tools/ask_question_about_documents.py @@ -130,17 +130,17 @@ def ask_question_about_documents(): image_files = [] parser = argparse.ArgumentParser(description="RAG Tool") - parser.add_argument("-p", "--prompt", "--query", type=str, required=True, help="User prompt or query") - parser.add_argument("-j", "--json", action="store_true", default=False, help="Output results as JSON") - parser.add_argument("-c", "--csv", action="store_true", default=False, help="Output results as CSV") - parser.add_argument("-b", "--baseline", required=False, action='store_true', + parser.add_argument("--prompt", "--query", type=str, required=True, help="User prompt or query") + parser.add_argument("--json", action="store_true", default=False, help="Output results as JSON") + parser.add_argument("--csv", action="store_true", default=False, help="Output results as CSV") + parser.add_argument("--baseline", required=False, action='store_true', help="Whether to get baseline from user docs") parser.add_argument("--files", nargs="+", required=False, help="Files of documents with optionally additional images to ask question about.") parser.add_argument("--urls", nargs="+", required=False, help="URLs to ask question about") parser.add_argument("-m", "--model", type=str, required=False, help="OpenAI or Open Source model to use") - parser.add_argument("--max_time", type=float, required=False, default=default_max_time, + parser.add_argument("--timeout", type=float, required=False, default=default_max_time, help="Maximum time to wait for response") parser.add_argument("--system_prompt", type=str, required=False, default=system_prompt, help="System prompt") parser.add_argument("--chat_conversation_file", type=str, required=False, @@ -217,13 +217,16 @@ def ask_question_about_documents(): chat_conversation=chat_conversation, model=args.model, system_prompt=args.system_prompt, - max_time=args.max_time, + max_time=args.timeout, ) - is_small = len(text_context_list) < 4 * 8192 + is_small = len(text_context_list) < 4 * 1024 if args.csv or is_small: - prompt_csv = "Extract all information in a well-organized form as a CSV so it can be used for data analysis or plotting. Try to make a single CSV if possible. Ensure each CSV block of output is inside a code block with triple backticks with the csv language tag." + if not args.prompt: + prompt_csv = "Extract all information in a well-organized form as a CSV so it can be used for data analysis or plotting. Try to make a single CSV if possible. Ensure each CSV block of output is inside a code block with triple backticks with the csv language tag." + else: + prompt_csv = "Extract requested information in a well-organized form as a CSV so it can be used for data analysis or plotting. Try to make a single CSV if possible. Ensure each CSV block of output is inside a code block with triple backticks with the csv language tag.\n\nRequested information: " + args.prompt csv_answer = get_rag_answer(prompt_csv, tag='', simple=True, **rag_kwargs) matches = re.findall(r'```(?:[a-zA-Z]*)\n(.*?)```', csv_answer, re.DOTALL) for match in matches: @@ -232,7 +235,7 @@ def ask_question_about_documents(): f.write(match) print(f"CSV output written to {csv_filename}. You can use this with code generation in order to answer the user's question or obtain some intermediate step using pandas etc. Remember, you are not good at solving puzzles, math, or doing question-answer on tabular data, so use these results in python code in order to solve such tasks.\n") - if args.json or is_small: + if args.json: json_kwargs = rag_kwargs.copy() json_kwargs['guided_json'] = None json_kwargs['response_format'] = 'json_object' diff --git a/openai_server/agent_tools/ask_question_about_image.py b/openai_server/agent_tools/ask_question_about_image.py index 5c8af457c..e1ed25eca 100644 --- a/openai_server/agent_tools/ask_question_about_image.py +++ b/openai_server/agent_tools/ask_question_about_image.py @@ -49,20 +49,20 @@ def main(): default_max_time = int(os.getenv('H2OGPT_AGENT_OPENAI_TIMEOUT', "120")) parser = argparse.ArgumentParser(description="OpenAI Vision API Script") - parser.add_argument("-t", "--timeout", type=int, default=60, help="Timeout for API calls") - parser.add_argument("-s", "--system_prompt", type=str, + parser.add_argument("--timeout", type=int, default=60, help="Timeout for API calls") + parser.add_argument("--system_prompt", type=str, default="""You are a highly capable AI assistant with advanced vision capabilities. * Analyze the provided image thoroughly and provide detailed, accurate descriptions or answers based on what you see. * Consider various aspects such as objects, people, actions, text, colors, composition, and any other relevant details. * If asked a specific question about the image, focus your response on addressing that question directly. * Ensure you add a critique of the image, if anything seems wrong, or if anything requires improvement.""", help="System prompt") - parser.add_argument("-p", "--prompt", type=str, required=True, help="User prompt") - parser.add_argument("-u", "--url", type=str, help="URL of the image") - parser.add_argument("-f", "--file", type=str, + parser.add_argument("--prompt", "--query", type=str, required=True, help="User prompt") + parser.add_argument("--url", type=str, help="URL of the image") + parser.add_argument("--file", type=str, help="Path to the image file. Accepts standard image formats (e.g., PNG, JPEG, JPG), SVG, and PDF files.") - parser.add_argument("-m", "--model", type=str, help="OpenAI or Open Source model to use") - parser.add_argument("-T", "--temperature", type=float, default=0.0, help="Temperature for the model") + parser.add_argument("--model", type=str, help="OpenAI or Open Source model to use") + parser.add_argument("--temperature", type=float, default=0.0, help="Temperature for the model") parser.add_argument("--max_tokens", type=int, default=1024, help="Maximum tokens for the model") parser.add_argument("--stream_output", help="Whether to stream output", default=True, action='store_true') parser.add_argument("--max_time", type=float, default=default_max_time, help="Maximum time to wait for response") diff --git a/reqs_optional/requirements_optional_image.txt b/reqs_optional/requirements_optional_image.txt index 18ab1ae44..d16e0019f 100644 --- a/reqs_optional/requirements_optional_image.txt +++ b/reqs_optional/requirements_optional_image.txt @@ -7,4 +7,4 @@ yt-dlp>=2024.8.6 # moviepy>=0.5.1 # for fiftyone with patches -pytubefix==6.15.4 +pytubefix==8.1.1 diff --git a/src/version.py b/src/version.py index e20899db8..3892ea9bf 100644 --- a/src/version.py +++ b/src/version.py @@ -1 +1 @@ -__version__ = "215c19927e72f9a533d39d92a649d91547051913" +__version__ = "ec5352532e799bc124638a75f4e58b0b93b4a5ee" diff --git a/src/vision/extract_movie.py b/src/vision/extract_movie.py index 1090cce66..22bf94f2f 100644 --- a/src/vision/extract_movie.py +++ b/src/vision/extract_movie.py @@ -1,4 +1,5 @@ import os +import sys import uuid from src.utils import makedirs, sanitize_filename, get_gradio_tmp @@ -14,8 +15,13 @@ def extract_unique_frames(urls=None, file=None, download_dir=None, export_dir=No makedirs(download_dir, exist_ok=True) # os.environ['FIFTYONE_DISABLE_SERVICES'] = 'True' if urls: - import fiftyone.utils.youtube as fouy - fouy.download_youtube_videos(urls, download_dir=download_dir) + if 'openai_server' not in sys.path: + sys.path.append('openai_server') + from openai_server.agent_tools.download_web_video import download_web_video + for url in urls: + download_web_video(video_url=url, base_url="https://www.youtube.com", output_dir=download_dir) + #import fiftyone.utils.youtube as fouy + #fouy.download_youtube_videos(urls, download_dir=download_dir) # Create a FiftyOne Dataset import fiftyone as fo