From 40b8af0de51a4bb6809f4aeb23110aed384fc107 Mon Sep 17 00:00:00 2001 From: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com> Date: Mon, 29 Jul 2024 14:45:10 +0530 Subject: [PATCH] fix: File type support for WF and API deployments (#530) * File type support for WF and API deployments * Structure tool version bumped --------- Co-authored-by: Hari John Kuriakose Co-authored-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> --- backend/sample.env | 4 ++-- backend/workflow_manager/endpoint/constants.py | 11 ++--------- backend/workflow_manager/endpoint/source.py | 5 ++++- docker/scripts/merge_env.py | 5 +---- prompt-service/src/unstract/prompt_service/main.py | 4 ++-- tools/structure/src/config/properties.json | 5 ++--- 6 files changed, 13 insertions(+), 21 deletions(-) diff --git a/backend/sample.env b/backend/sample.env index 506f8644b..b7267ad22 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -90,9 +90,9 @@ PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data # Structure Tool Image (Runs prompt studio exported tools) # https://hub.docker.com/r/unstract/tool-structure -STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.33" +STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.34" STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure" -STRUCTURE_TOOL_IMAGE_TAG="0.0.33" +STRUCTURE_TOOL_IMAGE_TAG="0.0.34" # Feature Flags EVALUATION_SERVER_IP=unstract-flipt diff --git a/backend/workflow_manager/endpoint/constants.py b/backend/workflow_manager/endpoint/constants.py index 01434b800..428d45ce5 100644 --- a/backend/workflow_manager/endpoint/constants.py +++ b/backend/workflow_manager/endpoint/constants.py @@ -72,8 +72,8 @@ class FileType: class FilePattern: - PDF_DOCUMENTS = ["*.pdf", "*.PDF"] - TEXT_DOCUMENTS = ["*.txt", "*.TXT"] + PDF_DOCUMENTS = ["*.pdf"] + TEXT_DOCUMENTS = ["*.txt", "*.doc", "*.docx"] IMAGES = [ "*.jpg", "*.jpeg", @@ -82,13 +82,6 @@ class FilePattern: "*.bmp", "*.tif", "*.tiff", - "*.JPG", - "*.JPEG", - "*.PNG", - "*.GIF", - "*.BMP", - "*.TIF", - "*.TIFF", ] diff --git a/backend/workflow_manager/endpoint/source.py b/backend/workflow_manager/endpoint/source.py index 33cb97484..d2707de0f 100644 --- a/backend/workflow_manager/endpoint/source.py +++ b/backend/workflow_manager/endpoint/source.py @@ -251,7 +251,10 @@ def _get_matched_files( continue if count >= limit: break - if any(fnmatch.fnmatch(file, pattern) for pattern in patterns): + if any( + fnmatch.fnmatchcase(file.lower(), pattern.lower()) + for pattern in patterns + ): file_path = os.path.join(root, file) file_path = f"{file_path}" matched_files.append(file_path) diff --git a/docker/scripts/merge_env.py b/docker/scripts/merge_env.py index 9a328b00c..73dcc4c6a 100644 --- a/docker/scripts/merge_env.py +++ b/docker/scripts/merge_env.py @@ -10,10 +10,7 @@ # import sys -PREFERRED_BASE_ENV_KEYS = [ - "STRUCTURE_TOOL_IMAGE_URL", - "STRUCTURE_TOOL_IMAGE_TAG", -] +PREFERRED_BASE_ENV_KEYS = [] DEFAULT_AUTH_KEY = "unstract" DEFAULT_ADMIN_KEY = "admin" SET_DEFAULT_KEYS = { diff --git a/prompt-service/src/unstract/prompt_service/main.py b/prompt-service/src/unstract/prompt_service/main.py index aeb9f022c..6a06b90e1 100644 --- a/prompt-service/src/unstract/prompt_service/main.py +++ b/prompt-service/src/unstract/prompt_service/main.py @@ -178,7 +178,7 @@ def prompt_processor() -> Any: if not payload: raise NoPayloadError tool_settings = payload.get(PSKeys.TOOL_SETTINGS, {}) - outputs = payload.get(PSKeys.OUTPUTS) + outputs = payload.get(PSKeys.OUTPUTS, []) tool_id: str = payload.get(PSKeys.TOOL_ID, "") run_id: str = payload.get(PSKeys.RUN_ID, "") file_hash = payload.get(PSKeys.FILE_HASH) @@ -198,7 +198,7 @@ def prompt_processor() -> Any: {"tool_id": tool_id, "run_id": run_id, "doc_name": doc_name}, LogLevel.DEBUG, RunLevel.RUN, - "Preparing to execute all prompts", + f"Preparing to execute {len(outputs)} prompt(s)", ) for output in outputs: # type:ignore diff --git a/tools/structure/src/config/properties.json b/tools/structure/src/config/properties.json index c45f18c62..8e7bb82ca 100644 --- a/tools/structure/src/config/properties.json +++ b/tools/structure/src/config/properties.json @@ -2,7 +2,7 @@ "schemaVersion": "0.0.1", "displayName": "Structure Tool", "functionName": "structure_tool", - "toolVersion": "0.0.33", + "toolVersion": "0.0.34", "description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio", "input": { "description": "File that needs to be indexed and parsed for answers" @@ -36,8 +36,7 @@ "restrictions": { "maxFileSize": "50MB", "allowedFileTypes": [ - "txt", - "pdf" + "*" ] } }