From dd0f8d03c3fadc49ecee1f2c6b3bb9cf70ebdf5e Mon Sep 17 00:00:00 2001 From: Chandrasekharan M Date: Fri, 26 Jul 2024 16:44:47 +0530 Subject: [PATCH 1/2] File type support for WF and API deployments --- backend/workflow_manager/endpoint/constants.py | 11 ++--------- backend/workflow_manager/endpoint/source.py | 5 ++++- docker/scripts/merge_env.py | 5 +---- prompt-service/src/unstract/prompt_service/main.py | 4 ++-- tools/structure/src/config/properties.json | 5 ++--- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/backend/workflow_manager/endpoint/constants.py b/backend/workflow_manager/endpoint/constants.py index 01434b800..428d45ce5 100644 --- a/backend/workflow_manager/endpoint/constants.py +++ b/backend/workflow_manager/endpoint/constants.py @@ -72,8 +72,8 @@ class FileType: class FilePattern: - PDF_DOCUMENTS = ["*.pdf", "*.PDF"] - TEXT_DOCUMENTS = ["*.txt", "*.TXT"] + PDF_DOCUMENTS = ["*.pdf"] + TEXT_DOCUMENTS = ["*.txt", "*.doc", "*.docx"] IMAGES = [ "*.jpg", "*.jpeg", @@ -82,13 +82,6 @@ class FilePattern: "*.bmp", "*.tif", "*.tiff", - "*.JPG", - "*.JPEG", - "*.PNG", - "*.GIF", - "*.BMP", - "*.TIF", - "*.TIFF", ] diff --git a/backend/workflow_manager/endpoint/source.py b/backend/workflow_manager/endpoint/source.py index 33cb97484..d2707de0f 100644 --- a/backend/workflow_manager/endpoint/source.py +++ b/backend/workflow_manager/endpoint/source.py @@ -251,7 +251,10 @@ def _get_matched_files( continue if count >= limit: break - if any(fnmatch.fnmatch(file, pattern) for pattern in patterns): + if any( + fnmatch.fnmatchcase(file.lower(), pattern.lower()) + for pattern in patterns + ): file_path = os.path.join(root, file) file_path = f"{file_path}" matched_files.append(file_path) diff --git a/docker/scripts/merge_env.py b/docker/scripts/merge_env.py index 9a328b00c..73dcc4c6a 100644 --- a/docker/scripts/merge_env.py +++ b/docker/scripts/merge_env.py @@ -10,10 +10,7 @@ # import sys -PREFERRED_BASE_ENV_KEYS = [ - "STRUCTURE_TOOL_IMAGE_URL", - "STRUCTURE_TOOL_IMAGE_TAG", -] +PREFERRED_BASE_ENV_KEYS = [] DEFAULT_AUTH_KEY = "unstract" DEFAULT_ADMIN_KEY = "admin" SET_DEFAULT_KEYS = { diff --git a/prompt-service/src/unstract/prompt_service/main.py b/prompt-service/src/unstract/prompt_service/main.py index aeb9f022c..6a06b90e1 100644 --- a/prompt-service/src/unstract/prompt_service/main.py +++ b/prompt-service/src/unstract/prompt_service/main.py @@ -178,7 +178,7 @@ def prompt_processor() -> Any: if not payload: raise NoPayloadError tool_settings = payload.get(PSKeys.TOOL_SETTINGS, {}) - outputs = payload.get(PSKeys.OUTPUTS) + outputs = payload.get(PSKeys.OUTPUTS, []) tool_id: str = payload.get(PSKeys.TOOL_ID, "") run_id: str = payload.get(PSKeys.RUN_ID, "") file_hash = payload.get(PSKeys.FILE_HASH) @@ -198,7 +198,7 @@ def prompt_processor() -> Any: {"tool_id": tool_id, "run_id": run_id, "doc_name": doc_name}, LogLevel.DEBUG, RunLevel.RUN, - "Preparing to execute all prompts", + f"Preparing to execute {len(outputs)} prompt(s)", ) for output in outputs: # type:ignore diff --git a/tools/structure/src/config/properties.json b/tools/structure/src/config/properties.json index c45f18c62..8e7bb82ca 100644 --- a/tools/structure/src/config/properties.json +++ b/tools/structure/src/config/properties.json @@ -2,7 +2,7 @@ "schemaVersion": "0.0.1", "displayName": "Structure Tool", "functionName": "structure_tool", - "toolVersion": "0.0.33", + "toolVersion": "0.0.34", "description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio", "input": { "description": "File that needs to be indexed and parsed for answers" @@ -36,8 +36,7 @@ "restrictions": { "maxFileSize": "50MB", "allowedFileTypes": [ - "txt", - "pdf" + "*" ] } } From 57b07a778f7954fc43d5e0eb7ed4e73c0a24d1d4 Mon Sep 17 00:00:00 2001 From: Chandrasekharan M Date: Fri, 26 Jul 2024 16:54:55 +0530 Subject: [PATCH 2/2] Structure tool version bumped --- backend/sample.env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/sample.env b/backend/sample.env index 506f8644b..b7267ad22 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -90,9 +90,9 @@ PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data # Structure Tool Image (Runs prompt studio exported tools) # https://hub.docker.com/r/unstract/tool-structure -STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.33" +STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.34" STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure" -STRUCTURE_TOOL_IMAGE_TAG="0.0.33" +STRUCTURE_TOOL_IMAGE_TAG="0.0.34" # Feature Flags EVALUATION_SERVER_IP=unstract-flipt