Skip to content

Commit

Permalink
FIX: Updated changes for highlight (#848)
Browse files Browse the repository at this point in the history
* Updated changes for highlight

Signed-off-by: Deepak <[email protected]>

* Fixes in structure tool

Signed-off-by: Deepak <[email protected]>

* Version bump

Signed-off-by: Deepak <[email protected]>

* Fixed log message

Signed-off-by: Deepak <[email protected]>

* Commit pdm.lock changes

* pdm.lock fix

Signed-off-by: Deepak <[email protected]>

* Commit pdm.lock changes

* Minor fix

Signed-off-by: Deepak <[email protected]>

* Commit pdm.lock changes

---------

Signed-off-by: Deepak <[email protected]>
Signed-off-by: Deepak K <[email protected]>
Co-authored-by: Deepak-Kesavan <[email protected]>
  • Loading branch information
Deepak-Kesavan and Deepak-Kesavan authored Nov 20, 2024
1 parent e6f0af4 commit 4a53912
Show file tree
Hide file tree
Showing 17 changed files with 328 additions and 330 deletions.
176 changes: 87 additions & 89 deletions backend/pdm.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions backend/prompt_studio/prompt_studio_core_v2/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class ToolStudioPromptKeys:
SUMMARIZE_AS_SOURCE = "summarize_as_source"
VARIABLE_MAP = "variable_map"
RECORD = "record"
FILE_PATH = "file_path"
ENABLE_HIGHLIGHT = "enable_highlight"


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ def _fetch_response(
TSPKeys.RUN_ID: run_id,
TSPKeys.FILE_NAME: doc_name,
TSPKeys.FILE_HASH: file_hash,
TSPKeys.FILE_PATH: doc_path,
Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID),
}

Expand Down
2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"python-socketio==5.9.0", # For log_events
"social-auth-app-django==5.3.0", # For OAuth
"social-auth-core==4.4.2", # For OAuth
"unstract-sdk~=0.53.1",
"unstract-sdk~=0.53.2",
# ! IMPORTANT!
# Indirect local dependencies usually need to be added in their own projects
# as: https://pdm-project.org/latest/usage/dependency/#local-dependencies.
Expand Down
4 changes: 2 additions & 2 deletions backend/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data

# Structure Tool Image (Runs prompt studio exported tools)
# https://hub.docker.com/r/unstract/tool-structure
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.48"
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.49"
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
STRUCTURE_TOOL_IMAGE_TAG="0.0.48"
STRUCTURE_TOOL_IMAGE_TAG="0.0.49"

# Feature Flags
EVALUATION_SERVER_IP=unstract-flipt
Expand Down
182 changes: 90 additions & 92 deletions pdm.lock

Large diffs are not rendered by default.

114 changes: 57 additions & 57 deletions platform-service/pdm.lock

Large diffs are not rendered by default.

120 changes: 60 additions & 60 deletions prompt-service/pdm.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion prompt-service/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies = [
"flask~=3.0",
"llama-index==0.10.58",
"python-dotenv==1.0.0",
"unstract-sdk~=0.53.1",
"unstract-sdk~=0.53.2",
"redis>=5.0.3",
"unstract-core @ file:///${PROJECT_ROOT}/../unstract/core",
"unstract-flags @ file:///${PROJECT_ROOT}/../unstract/flags",
Expand Down
4 changes: 3 additions & 1 deletion prompt-service/src/unstract/prompt_service/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,15 @@ class PromptServiceContants:
TABLE_SETTINGS = "table_settings"
EPILOGUE = "epilogue"
PLATFORM_POSTAMBLE = "platform_postamble"
EXTRACT_EPILOGUE = "extract-epilogue"
HIGHLIGHT_DATA_PLUGIN = "highlight-data"
CLEAN_CONTEXT = "clean-context"
SUMMARIZE_AS_SOURCE = "summarize_as_source"
VARIABLE_MAP = "variable_map"
RECORD = "record"
TEXT = "text"
ENABLE_HIGHLIGHT = "enable_highlight"
FILE_PATH = "file_path"
HIGHLIGHT_DATA = "highlight_data"


class RunLevel(Enum):
Expand Down
31 changes: 15 additions & 16 deletions prompt-service/src/unstract/prompt_service/helper.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import importlib
import os
from json import JSONDecodeError
from logging import Logger
from pathlib import Path
from typing import Any, Optional

from dotenv import load_dotenv
from flask import Flask, current_app, json
from flask import Flask, current_app
from unstract.prompt_service.config import db
from unstract.prompt_service.constants import DBTableV2
from unstract.prompt_service.constants import PromptServiceContants as PSKeys
Expand Down Expand Up @@ -215,6 +214,7 @@ def construct_and_run_prompt(
context: str,
prompt: str,
metadata: dict[str, Any],
file_path: str = "",
) -> str:
platform_postamble = tool_settings.get(PSKeys.PLATFORM_POSTAMBLE, "")
summarize_as_source = tool_settings.get(PSKeys.SUMMARIZE_AS_SOURCE)
Expand All @@ -236,6 +236,7 @@ def construct_and_run_prompt(
prompt_key=output[PSKeys.NAME],
prompt_type=output.get(PSKeys.TYPE, PSKeys.TEXT),
enable_highlight=enable_highlight,
file_path=file_path,
)


Expand Down Expand Up @@ -276,29 +277,27 @@ def run_completion(
prompt_key: Optional[str] = None,
prompt_type: Optional[str] = PSKeys.TEXT,
enable_highlight: bool = False,
file_path: str = "",
) -> str:
logger: Logger = current_app.logger
try:
extract_epilogue_plugin: dict[str, Any] = plugins.get(
PSKeys.EXTRACT_EPILOGUE, {}
highlight_data_plugin: dict[str, Any] = plugins.get(
PSKeys.HIGHLIGHT_DATA_PLUGIN, {}
)
extract_epilogue = None
if extract_epilogue_plugin and enable_highlight:
extract_epilogue = extract_epilogue_plugin["entrypoint_cls"].run
highlight_data = None
if highlight_data_plugin and enable_highlight:
highlight_data = highlight_data_plugin["entrypoint_cls"](
logger=current_app.logger, file_path=file_path
).run
completion = llm.complete(
prompt=prompt,
process_text=extract_epilogue,
process_text=highlight_data,
extract_json=prompt_type.lower() != PSKeys.TEXT,
)
answer: str = completion[PSKeys.RESPONSE].text
epilogue = completion.get(PSKeys.EPILOGUE)
if all([metadata, epilogue, prompt_key]):
try:
logger.info(f"Epilogue extracted from LLM: {epilogue}")
epilogue = json.loads(epilogue)
except JSONDecodeError:
logger.error(f"Failed to convert epilogue to JSON: {epilogue}")
metadata.setdefault(PSKeys.EPILOGUE, {})[prompt_key] = epilogue
highlight_data = completion.get(PSKeys.HIGHLIGHT_DATA)
if all([metadata, highlight_data, prompt_key]):
metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = highlight_data
return answer
# TODO: Catch and handle specific exception here
except SdkRateLimitError as e:
Expand Down
2 changes: 2 additions & 0 deletions prompt-service/src/unstract/prompt_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def prompt_processor() -> Any:
tool_id: str = payload.get(PSKeys.TOOL_ID, "")
run_id: str = payload.get(PSKeys.RUN_ID, "")
file_hash = payload.get(PSKeys.FILE_HASH)
file_path = payload.get(PSKeys.FILE_PATH)
doc_name = str(payload.get(PSKeys.FILE_NAME, ""))
log_events_id: str = payload.get(PSKeys.LOG_EVENTS_ID, "")
structured_output: dict[str, Any] = {}
Expand Down Expand Up @@ -327,6 +328,7 @@ def prompt_processor() -> Any:
context="\n".join(context),
prompt="promptx",
metadata=metadata,
file_path=file_path,
)
metadata[PSKeys.CONTEXT][output[PSKeys.NAME]] = get_cleaned_context(
context
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ hook-check-django-migrations = [
"psycopg2-binary==2.9.9",
"python-dotenv==1.0.0",
"python-magic==0.4.27",
"unstract-sdk~=0.53.1",
"unstract-sdk~=0.53.2",
"-e unstract-connectors @ file:///${PROJECT_ROOT}/unstract/connectors",
"-e unstract-core @ file:///${PROJECT_ROOT}/unstract/core",
"-e unstract-flags @ file:///${PROJECT_ROOT}/unstract/flags",
Expand Down
2 changes: 1 addition & 1 deletion tools/structure/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.1
unstract-sdk~=0.53.2
1 change: 1 addition & 0 deletions tools/structure/src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,4 @@ class SettingsKeys:
HIGHLIGHT_DATA = "highlight_data"
CONFIDENCE_DATA = "confidence_data"
EXECUTION_RUN_DATA_FOLDER = "EXECUTION_RUN_DATA_FOLDER"
FILE_PATH = "file_path"
12 changes: 4 additions & 8 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def run(
self.get_env_or_die(SettingsKeys.EXECUTION_RUN_DATA_FOLDER)
)
run_id = CommonUtils.generate_uuid()
extracted_input_file = str(execution_run_data_folder / SettingsKeys.EXTRACT)
# TODO : Resolve and pass log events ID
payload = {
SettingsKeys.RUN_ID: run_id,
Expand All @@ -110,6 +111,7 @@ def run(
SettingsKeys.TOOL_ID: tool_id,
SettingsKeys.FILE_HASH: file_hash,
SettingsKeys.FILE_NAME: file_name,
SettingsKeys.FILE_PATH: extracted_input_file,
}
# TODO: Need to split extraction and indexing
# to avoid unwanted indexing
Expand Down Expand Up @@ -203,10 +205,7 @@ def run(
for output in outputs:
if SettingsKeys.TABLE_SETTINGS in output:
table_settings = output[SettingsKeys.TABLE_SETTINGS]
extracted_input_file = (
execution_run_data_folder / SettingsKeys.EXTRACT
)
table_settings[SettingsKeys.INPUT_FILE] = str(extracted_input_file)
table_settings[SettingsKeys.INPUT_FILE] = extracted_input_file
output.update({SettingsKeys.TABLE_SETTINGS: table_settings})

self.stream_log(f"Fetching responses for {len(outputs)} prompt(s)...")
Expand Down Expand Up @@ -237,17 +236,14 @@ def run(
try:
from helper import ( # type: ignore [attr-defined]
get_confidence_data,
transform_dict,
)

highlight_data = transform_dict(epilogue, tool_data_dir)
metadata[SettingsKeys.HIGHLIGHT_DATA] = highlight_data
metadata[SettingsKeys.CONFIDENCE_DATA] = get_confidence_data(
epilogue, tool_data_dir
)
except ImportError:
self.stream_log(
f"Highlight metadata is not added. {PAID_FEATURE_MSG}",
f"Confidence data is not added. {PAID_FEATURE_MSG}",
level=LogLevel.WARN,
)
# Update the dictionary with modified metadata
Expand Down
2 changes: 1 addition & 1 deletion unstract/tool-registry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies = [
"docker~=6.1.3",
"jsonschema~=4.18.2",
"PyYAML~=6.0.1",
"unstract-sdk~=0.53.1",
"unstract-sdk~=0.53.2",
# ! IMPORTANT!
# Local dependencies usually need to be added as:
# https://pdm-project.org/latest/usage/dependency/#local-dependencies
Expand Down

0 comments on commit 4a53912

Please sign in to comment.