Skip to content

Commit

Permalink
fix: File type support for WF and API deployments (#530)
Browse files Browse the repository at this point in the history
* File type support for WF and API deployments

* Structure tool version bumped

---------

Co-authored-by: Hari John Kuriakose <[email protected]>
Co-authored-by: Gayathri <[email protected]>
  • Loading branch information
3 people authored Jul 29, 2024
1 parent 56d5c73 commit 40b8af0
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 21 deletions.
4 changes: 2 additions & 2 deletions backend/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data

# Structure Tool Image (Runs prompt studio exported tools)
# https://hub.docker.com/r/unstract/tool-structure
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.33"
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.34"
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
STRUCTURE_TOOL_IMAGE_TAG="0.0.33"
STRUCTURE_TOOL_IMAGE_TAG="0.0.34"

# Feature Flags
EVALUATION_SERVER_IP=unstract-flipt
Expand Down
11 changes: 2 additions & 9 deletions backend/workflow_manager/endpoint/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ class FileType:


class FilePattern:
PDF_DOCUMENTS = ["*.pdf", "*.PDF"]
TEXT_DOCUMENTS = ["*.txt", "*.TXT"]
PDF_DOCUMENTS = ["*.pdf"]
TEXT_DOCUMENTS = ["*.txt", "*.doc", "*.docx"]
IMAGES = [
"*.jpg",
"*.jpeg",
Expand All @@ -82,13 +82,6 @@ class FilePattern:
"*.bmp",
"*.tif",
"*.tiff",
"*.JPG",
"*.JPEG",
"*.PNG",
"*.GIF",
"*.BMP",
"*.TIF",
"*.TIFF",
]


Expand Down
5 changes: 4 additions & 1 deletion backend/workflow_manager/endpoint/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,10 @@ def _get_matched_files(
continue
if count >= limit:
break
if any(fnmatch.fnmatch(file, pattern) for pattern in patterns):
if any(
fnmatch.fnmatchcase(file.lower(), pattern.lower())
for pattern in patterns
):
file_path = os.path.join(root, file)
file_path = f"{file_path}"
matched_files.append(file_path)
Expand Down
5 changes: 1 addition & 4 deletions docker/scripts/merge_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
#
import sys

PREFERRED_BASE_ENV_KEYS = [
"STRUCTURE_TOOL_IMAGE_URL",
"STRUCTURE_TOOL_IMAGE_TAG",
]
PREFERRED_BASE_ENV_KEYS = []
DEFAULT_AUTH_KEY = "unstract"
DEFAULT_ADMIN_KEY = "admin"
SET_DEFAULT_KEYS = {
Expand Down
4 changes: 2 additions & 2 deletions prompt-service/src/unstract/prompt_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def prompt_processor() -> Any:
if not payload:
raise NoPayloadError
tool_settings = payload.get(PSKeys.TOOL_SETTINGS, {})
outputs = payload.get(PSKeys.OUTPUTS)
outputs = payload.get(PSKeys.OUTPUTS, [])
tool_id: str = payload.get(PSKeys.TOOL_ID, "")
run_id: str = payload.get(PSKeys.RUN_ID, "")
file_hash = payload.get(PSKeys.FILE_HASH)
Expand All @@ -198,7 +198,7 @@ def prompt_processor() -> Any:
{"tool_id": tool_id, "run_id": run_id, "doc_name": doc_name},
LogLevel.DEBUG,
RunLevel.RUN,
"Preparing to execute all prompts",
f"Preparing to execute {len(outputs)} prompt(s)",
)

for output in outputs: # type:ignore
Expand Down
5 changes: 2 additions & 3 deletions tools/structure/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Structure Tool",
"functionName": "structure_tool",
"toolVersion": "0.0.33",
"toolVersion": "0.0.34",
"description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
"input": {
"description": "File that needs to be indexed and parsed for answers"
Expand Down Expand Up @@ -36,8 +36,7 @@
"restrictions": {
"maxFileSize": "50MB",
"allowedFileTypes": [
"txt",
"pdf"
"*"
]
}
}

0 comments on commit 40b8af0

Please sign in to comment.