Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: File type support for WF and API deployments #530

Merged
merged 5 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions backend/workflow_manager/endpoint/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ class FileType:


class FilePattern:
PDF_DOCUMENTS = ["*.pdf", "*.PDF"]
TEXT_DOCUMENTS = ["*.txt", "*.TXT"]
PDF_DOCUMENTS = ["*.pdf"]
TEXT_DOCUMENTS = ["*.txt", "*.doc", "*.docx"]
IMAGES = [
"*.jpg",
"*.jpeg",
Expand All @@ -82,13 +82,6 @@ class FilePattern:
"*.bmp",
"*.tif",
"*.tiff",
"*.JPG",
"*.JPEG",
"*.PNG",
"*.GIF",
"*.BMP",
"*.TIF",
"*.TIFF",
]


Expand Down
5 changes: 4 additions & 1 deletion backend/workflow_manager/endpoint/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,10 @@ def _get_matched_files(
continue
if count >= limit:
break
if any(fnmatch.fnmatch(file, pattern) for pattern in patterns):
if any(
fnmatch.fnmatchcase(file.lower(), pattern.lower())
for pattern in patterns
):
file_path = os.path.join(root, file)
file_path = f"{file_path}"
matched_files.append(file_path)
Expand Down
5 changes: 1 addition & 4 deletions docker/scripts/merge_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
#
import sys

PREFERRED_BASE_ENV_KEYS = [
"STRUCTURE_TOOL_IMAGE_URL",
"STRUCTURE_TOOL_IMAGE_TAG",
]
PREFERRED_BASE_ENV_KEYS = []
Comment on lines -16 to +13
Copy link
Contributor

@hari-kuriakose hari-kuriakose Jul 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chandrasekharan-zipstack Better to revert this change so that the run platform script always honor sample.env and use its contents to override .env.

This means we just need to ensure sample.env is updated with new structure tool version each time, that's all.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hari-kuriakose we always update the sample.env with the latest structure tool version. However since we added these fields under PREFERRED_BASE_ENV_KEYS -> during the merge of envs, we don't override the value already present in the user's .env. As a result the user would always have an older version of structure tool present in their setup until and unless they manually check and update the value

DEFAULT_AUTH_KEY = "unstract"
DEFAULT_ADMIN_KEY = "admin"
SET_DEFAULT_KEYS = {
Expand Down
4 changes: 2 additions & 2 deletions prompt-service/src/unstract/prompt_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def prompt_processor() -> Any:
if not payload:
raise NoPayloadError
tool_settings = payload.get(PSKeys.TOOL_SETTINGS, {})
outputs = payload.get(PSKeys.OUTPUTS)
outputs = payload.get(PSKeys.OUTPUTS, [])
tool_id: str = payload.get(PSKeys.TOOL_ID, "")
run_id: str = payload.get(PSKeys.RUN_ID, "")
file_hash = payload.get(PSKeys.FILE_HASH)
Expand All @@ -198,7 +198,7 @@ def prompt_processor() -> Any:
{"tool_id": tool_id, "run_id": run_id, "doc_name": doc_name},
LogLevel.DEBUG,
RunLevel.RUN,
"Preparing to execute all prompts",
f"Preparing to execute {len(outputs)} prompt(s)",
)

for output in outputs: # type:ignore
Expand Down
5 changes: 2 additions & 3 deletions tools/structure/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Structure Tool",
"functionName": "structure_tool",
"toolVersion": "0.0.33",
"toolVersion": "0.0.34",
"description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
"input": {
"description": "File that needs to be indexed and parsed for answers"
Expand Down Expand Up @@ -36,8 +36,7 @@
"restrictions": {
"maxFileSize": "50MB",
"allowedFileTypes": [
"txt",
"pdf"
"*"
]
}
}
Loading