diff --git a/backend/migrating/v2/query.py b/backend/migrating/v2/query.py index 813f0a5ae..c8aa0087c 100644 --- a/backend/migrating/v2/query.py +++ b/backend/migrating/v2/query.py @@ -317,16 +317,16 @@ def get_organization_migrations( { "name": f"migration_{schema}_003_workflow", "src_query": f""" - SELECT id, prompt_name, description, workflow_name, prompt_text, - is_active, status, llm_response, workflow_owner_id, + SELECT id, description, workflow_name, + is_active, status, workflow_owner_id, deployment_type, source_settings, destination_settings, created_by_id, modified_by_id, modified_at, created_at FROM "{schema}".workflow_workflow; """, "dest_query": f""" INSERT INTO "{self.v2_schema}".workflow ( - id, prompt_name, description, workflow_name, prompt_text, - is_active, status, llm_response, workflow_owner_id, + id, description, workflow_name, + is_active, status, workflow_owner_id, deployment_type, source_settings, destination_settings, created_by_id, modified_by_id, modified_at, created_at, organization_id diff --git a/backend/sample.env b/backend/sample.env index 887ae5d6b..94b151db9 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -42,16 +42,6 @@ LANDING_URL="http://frontend.unstract.localhost/landing" ERROR_URL="http://frontend.unstract.localhost/error" WEB_APP_ORIGIN_URL="http://frontend.unstract.localhost" -# Azure OpenAI -OPENAI_API_KEY= -OPENAI_API_BASE= -OPENAI_API_VERSION= -OPENAI_API_ENGINE= -OPENAI_API_MODEL= -OPENAI_API_MODEL_EMBEDDING= -OPENAI_API_DEPLOYMENT_EMBEDDING= -OPENAI_API_TYPE= - # API keys for trusted services INTERNAL_SERVICE_API_KEY= diff --git a/backend/tool_instance/tool_processor.py b/backend/tool_instance/tool_processor.py index 792b4a0b0..b7584b419 100644 --- a/backend/tool_instance/tool_processor.py +++ b/backend/tool_instance/tool_processor.py @@ -128,10 +128,3 @@ def get_tool_list(user: User) -> list[dict[str, Any]]: tool_list: list[dict[str, Any]] = tool_registry.fetch_tools_descriptions() tool_list = tool_list + prompt_studio_tools return tool_list - - @staticmethod - def get_registry_tools() -> list[Tool]: - """Function to get a list of tools.""" - tool_registry = ToolRegistry() - tool_list: list[Tool] = tool_registry.fetch_all_tools() - return tool_list diff --git a/backend/tool_instance_v2/tool_processor.py b/backend/tool_instance_v2/tool_processor.py index 44f8cca30..e5f9796df 100644 --- a/backend/tool_instance_v2/tool_processor.py +++ b/backend/tool_instance_v2/tool_processor.py @@ -128,10 +128,3 @@ def get_tool_list(user: User) -> list[dict[str, Any]]: tool_list: list[dict[str, Any]] = tool_registry.fetch_tools_descriptions() tool_list = tool_list + prompt_studio_tools return tool_list - - @staticmethod - def get_registry_tools() -> list[Tool]: - """Function to get a list of tools.""" - tool_registry = ToolRegistry() - tool_list: list[Tool] = tool_registry.fetch_all_tools() - return tool_list diff --git a/backend/workflow_manager/workflow/constants.py b/backend/workflow_manager/workflow/constants.py index 2b9d3cf8e..a1cf08b43 100644 --- a/backend/workflow_manager/workflow/constants.py +++ b/backend/workflow_manager/workflow/constants.py @@ -1,7 +1,6 @@ class WorkflowKey: """Dict keys related to workflows.""" - PROMPT_TEXT = "prompt_text" LLM_RESPONSE = "llm_response" WF_STEPS = "steps" WF_TOOL = "tool" diff --git a/backend/workflow_manager/workflow/generator.py b/backend/workflow_manager/workflow/generator.py deleted file mode 100644 index 31aabafb6..000000000 --- a/backend/workflow_manager/workflow/generator.py +++ /dev/null @@ -1,114 +0,0 @@ -import logging -import uuid -from typing import Any - -from rest_framework.request import Request -from tool_instance.constants import ToolInstanceKey as TIKey -from tool_instance.exceptions import ToolInstantiationError -from tool_instance.tool_processor import ToolProcessor -from unstract.tool_registry.dto import Tool -from workflow_manager.workflow.constants import WorkflowKey -from workflow_manager.workflow.dto import ProvisionalWorkflow -from workflow_manager.workflow.exceptions import WorkflowGenerationError -from workflow_manager.workflow.models.workflow import Workflow as WorkflowModel - -from unstract.core.llm_workflow_generator.llm_interface import LLMInterface - -logger = logging.getLogger(__name__) - - -# TODO: Can be removed as not getting used with UX chnages. -class WorkflowGenerator: - """Helps with generating a workflow using the LLM.""" - - def __init__(self, workflow_id: str = str(uuid.uuid4())) -> None: - self._request: Request = {} - self._llm_response = "" - self._workflow_id = workflow_id - self._provisional_wf: ProvisionalWorkflow - - @property - def llm_response(self) -> dict[str, Any]: - output: dict[str, str] = self._provisional_wf.output - return output - - @property - def provisional_wf(self) -> ProvisionalWorkflow: - return self._provisional_wf - - def _get_provisional_workflow(self, tools: list[Tool]) -> ProvisionalWorkflow: - """Helper to generate the provisional workflow Gets stored as - `workflow.Workflow.llm_response` eventually.""" - provisional_wf: ProvisionalWorkflow - try: - if not self._request: - raise WorkflowGenerationError( - "Unable to generate a workflow: missing request" - ) - llm_interface = LLMInterface() - - provisional_wf_dict = llm_interface.get_provisional_workflow_from_llm( - workflow_id=self._workflow_id, - tools=tools, - user_prompt=self._request.data.get(WorkflowKey.PROMPT_TEXT), - use_cache=True, - ) - provisional_wf = ProvisionalWorkflow(provisional_wf_dict) - if provisional_wf.result != "OK": - raise WorkflowGenerationError( - f"Unable to generate a workflow: {provisional_wf.output}" - ) - except Exception as e: - logger.error(f"{e}") - raise WorkflowGenerationError - return provisional_wf - - def set_request(self, request: Request) -> None: - self._request = request - - def generate_workflow(self, tools: list[Tool]) -> None: - """Used to talk to the GPT model through core and obtain a provisional - workflow for the user to work with.""" - self._provisional_wf = self._get_provisional_workflow(tools) - - @staticmethod - def get_tool_instance_data_from_llm( - workflow: WorkflowModel, - ) -> list[dict[str, str]]: - """Used to generate the dict of tool instances for a given workflow. - - Call with ToolInstanceSerializer(data=tool_instance_data_list,many=True) - """ - tool_instance_data_list = [] - for step, tool_step in enumerate( - workflow.llm_response.get(WorkflowKey.WF_STEPS, []) - ): - step = step + 1 - logger.info(f"Building tool instance data for step: {step}") - tool_function: str = tool_step[WorkflowKey.WF_TOOL] - wf_input: str = tool_step[WorkflowKey.WF_INPUT] - wf_output: str = tool_step[WorkflowKey.WF_OUTPUT] - try: - tool: Tool = ToolProcessor.get_tool_by_uid(tool_function) - # TODO: Mark optional fields in model and handle in ToolInstance serializer # noqa - tool_instance_data = { - TIKey.PK: tool_step[WorkflowKey.WF_TOOL_UUID], - TIKey.WORKFLOW: workflow.id, - # Added to support changes for UN-154 - WorkflowKey.WF_ID: workflow.id, - TIKey.TOOL_ID: tool_function, - TIKey.METADATA: { - WorkflowKey.WF_TOOL_INSTANCE_ID: tool_step[ - WorkflowKey.WF_TOOL_UUID - ], - **ToolProcessor.get_default_settings(tool), - }, - TIKey.STEP: str(step), - TIKey.INPUT: wf_input, - TIKey.OUTPUT: wf_output, - } - tool_instance_data_list.append(tool_instance_data) - except Exception as e: - logger.error(f"Error while getting data for {tool_function}: {e}") - raise ToolInstantiationError(tool_name=tool_function) - return tool_instance_data_list diff --git a/backend/workflow_manager/workflow/serializers.py b/backend/workflow_manager/workflow/serializers.py index 04dfcdac4..51629c193 100644 --- a/backend/workflow_manager/workflow/serializers.py +++ b/backend/workflow_manager/workflow/serializers.py @@ -15,8 +15,6 @@ from tool_instance.tool_instance_helper import ToolInstanceHelper from workflow_manager.endpoint.models import WorkflowEndpoint from workflow_manager.workflow.constants import WorkflowExecutionKey, WorkflowKey -from workflow_manager.workflow.exceptions import WorkflowGenerationError -from workflow_manager.workflow.generator import WorkflowGenerator from workflow_manager.workflow.models.execution import WorkflowExecution from workflow_manager.workflow.models.execution_log import ExecutionLog from workflow_manager.workflow.models.workflow import Workflow @@ -59,33 +57,6 @@ def create(self, validated_data: dict[str, Any]) -> Any: ).user return super().create(validated_data) - def update(self, instance: Any, validated_data: dict[str, Any]) -> Any: - if validated_data.get(WorkflowKey.PROMPT_TEXT): - instance.workflow_tool.all().delete() - return super().update(instance, validated_data) - - def save(self, **kwargs: Any) -> Workflow: - workflow: Workflow = super().save(**kwargs) - if self.validated_data.get(WorkflowKey.PROMPT_TEXT): - try: - tool_serializer = ToolInstanceSerializer( - data=WorkflowGenerator.get_tool_instance_data_from_llm( - workflow=workflow - ), - many=True, - context=self.context, - ) - tool_serializer.is_valid(raise_exception=True) - tool_serializer.save() - except Exception as exc: - logger.error(f"Error while generating tool instances: {exc}") - raise WorkflowGenerationError - - request = self.context.get("request") - if not request: - return workflow - return workflow - class ExecuteWorkflowSerializer(Serializer): workflow_id = UUIDField(required=False) diff --git a/backend/workflow_manager/workflow/views.py b/backend/workflow_manager/workflow/views.py index 740f5d46d..f8f2d4cd8 100644 --- a/backend/workflow_manager/workflow/views.py +++ b/backend/workflow_manager/workflow/views.py @@ -4,7 +4,6 @@ from connector.connector_instance_helper import ConnectorInstanceHelper from django.conf import settings from django.db.models.query import QuerySet -from numpy import deprecate_with_doc from permissions.permission import IsOwner from pipeline.models import Pipeline from pipeline.pipeline_processor import PipelineProcessor @@ -13,8 +12,6 @@ from rest_framework.request import Request from rest_framework.response import Response from rest_framework.versioning import URLPathVersioning -from tool_instance.tool_processor import ToolProcessor -from unstract.tool_registry.dto import Tool from utils.filtering import FilterHelper from workflow_manager.endpoint.destination import DestinationConnector from workflow_manager.endpoint.dto import FileHash @@ -29,7 +26,6 @@ WorkflowGenerationError, WorkflowRegenerationError, ) -from workflow_manager.workflow.generator import WorkflowGenerator from workflow_manager.workflow.models.execution import WorkflowExecution from workflow_manager.workflow.models.workflow import Workflow from workflow_manager.workflow.serializers import ( @@ -82,14 +78,6 @@ def get_serializer_class(self) -> serializers.Serializer: else: return WorkflowSerializer - @deprecate_with_doc("Not using with the latest UX chnages") - def _generate_workflow(self, workflow_id: str) -> WorkflowGenerator: - registry_tools: list[Tool] = ToolProcessor.get_registry_tools() - generator = WorkflowGenerator(workflow_id=workflow_id) - generator.set_request(self.request) - generator.generate_workflow(registry_tools) - return generator - def perform_update(self, serializer: WorkflowSerializer) -> Workflow: """To edit a workflow. diff --git a/backend/workflow_manager/workflow_v2/constants.py b/backend/workflow_manager/workflow_v2/constants.py index 2b9d3cf8e..a1cf08b43 100644 --- a/backend/workflow_manager/workflow_v2/constants.py +++ b/backend/workflow_manager/workflow_v2/constants.py @@ -1,7 +1,6 @@ class WorkflowKey: """Dict keys related to workflows.""" - PROMPT_TEXT = "prompt_text" LLM_RESPONSE = "llm_response" WF_STEPS = "steps" WF_TOOL = "tool" diff --git a/backend/workflow_manager/workflow_v2/generator.py b/backend/workflow_manager/workflow_v2/generator.py deleted file mode 100644 index ace9f48b0..000000000 --- a/backend/workflow_manager/workflow_v2/generator.py +++ /dev/null @@ -1,114 +0,0 @@ -import logging -import uuid -from typing import Any - -from rest_framework.request import Request -from tool_instance_v2.constants import ToolInstanceKey as TIKey -from tool_instance_v2.exceptions import ToolInstantiationError -from tool_instance_v2.tool_processor import ToolProcessor -from unstract.tool_registry.dto import Tool -from workflow_manager.workflow_v2.constants import WorkflowKey -from workflow_manager.workflow_v2.dto import ProvisionalWorkflow -from workflow_manager.workflow_v2.exceptions import WorkflowGenerationError -from workflow_manager.workflow_v2.models.workflow import Workflow as WorkflowModel - -from unstract.core.llm_workflow_generator.llm_interface import LLMInterface - -logger = logging.getLogger(__name__) - - -# TODO: Can be removed as not getting used with UX chnages. -class WorkflowGenerator: - """Helps with generating a workflow using the LLM.""" - - def __init__(self, workflow_id: str = str(uuid.uuid4())) -> None: - self._request: Request = {} - self._llm_response = "" - self._workflow_id = workflow_id - self._provisional_wf: ProvisionalWorkflow - - @property - def llm_response(self) -> dict[str, Any]: - output: dict[str, str] = self._provisional_wf.output - return output - - @property - def provisional_wf(self) -> ProvisionalWorkflow: - return self._provisional_wf - - def _get_provisional_workflow(self, tools: list[Tool]) -> ProvisionalWorkflow: - """Helper to generate the provisional workflow Gets stored as - `workflow.Workflow.llm_response` eventually.""" - provisional_wf: ProvisionalWorkflow - try: - if not self._request: - raise WorkflowGenerationError( - "Unable to generate a workflow: missing request" - ) - llm_interface = LLMInterface() - - provisional_wf_dict = llm_interface.get_provisional_workflow_from_llm( - workflow_id=self._workflow_id, - tools=tools, - user_prompt=self._request.data.get(WorkflowKey.PROMPT_TEXT), - use_cache=True, - ) - provisional_wf = ProvisionalWorkflow(provisional_wf_dict) - if provisional_wf.result != "OK": - raise WorkflowGenerationError( - f"Unable to generate a workflow: {provisional_wf.output}" - ) - except Exception as e: - logger.error(f"{e}") - raise WorkflowGenerationError - return provisional_wf - - def set_request(self, request: Request) -> None: - self._request = request - - def generate_workflow(self, tools: list[Tool]) -> None: - """Used to talk to the GPT model through core and obtain a provisional - workflow for the user to work with.""" - self._provisional_wf = self._get_provisional_workflow(tools) - - @staticmethod - def get_tool_instance_data_from_llm( - workflow: WorkflowModel, - ) -> list[dict[str, str]]: - """Used to generate the dict of tool instances for a given workflow. - - Call with ToolInstanceSerializer(data=tool_instance_data_list,many=True) - """ - tool_instance_data_list = [] - for step, tool_step in enumerate( - workflow.llm_response.get(WorkflowKey.WF_STEPS, []) - ): - step = step + 1 - logger.info(f"Building tool instance data for step: {step}") - tool_function: str = tool_step[WorkflowKey.WF_TOOL] - wf_input: str = tool_step[WorkflowKey.WF_INPUT] - wf_output: str = tool_step[WorkflowKey.WF_OUTPUT] - try: - tool: Tool = ToolProcessor.get_tool_by_uid(tool_function) - # TODO: Mark optional fields in model and handle in ToolInstance serializer # noqa - tool_instance_data = { - TIKey.PK: tool_step[WorkflowKey.WF_TOOL_UUID], - TIKey.WORKFLOW: workflow.id, - # Added to support changes for UN-154 - WorkflowKey.WF_ID: workflow.id, - TIKey.TOOL_ID: tool_function, - TIKey.METADATA: { - WorkflowKey.WF_TOOL_INSTANCE_ID: tool_step[ - WorkflowKey.WF_TOOL_UUID - ], - **ToolProcessor.get_default_settings(tool), - }, - TIKey.STEP: str(step), - TIKey.INPUT: wf_input, - TIKey.OUTPUT: wf_output, - } - tool_instance_data_list.append(tool_instance_data) - except Exception as e: - logger.error(f"Error while getting data for {tool_function}: {e}") - raise ToolInstantiationError(tool_name=tool_function) - return tool_instance_data_list diff --git a/backend/workflow_manager/workflow_v2/migrations/0002_remove_workflow_llm_response_and_more.py b/backend/workflow_manager/workflow_v2/migrations/0002_remove_workflow_llm_response_and_more.py new file mode 100644 index 000000000..d669ff198 --- /dev/null +++ b/backend/workflow_manager/workflow_v2/migrations/0002_remove_workflow_llm_response_and_more.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.1 on 2024-11-05 08:43 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("workflow_v2", "0001_initial"), + ] + + operations = [ + migrations.RemoveField( + model_name="workflow", + name="llm_response", + ), + migrations.RemoveField( + model_name="workflow", + name="prompt_name", + ), + migrations.RemoveField( + model_name="workflow", + name="prompt_text", + ), + ] diff --git a/backend/workflow_manager/workflow_v2/models/workflow.py b/backend/workflow_manager/workflow_v2/models/workflow.py index 18f3015b3..1dc32c394 100644 --- a/backend/workflow_manager/workflow_v2/models/workflow.py +++ b/backend/workflow_manager/workflow_v2/models/workflow.py @@ -8,7 +8,6 @@ DefaultOrganizationMixin, ) -PROMPT_NAME_LENGTH = 32 WORKFLOW_STATUS_LENGTH = 16 DESCRIPTION_FIELD_LENGTH = 490 WORKFLOW_NAME_SIZE = 128 @@ -34,14 +33,10 @@ class ExecutionAction(models.TextChoices): # TODO Make this guid as primaryId instaed of current id bigint id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - # TODO: Move prompt fields as a One-One relationship/into Prompt instead - prompt_name = models.CharField(max_length=PROMPT_NAME_LENGTH, default="") description = models.TextField(max_length=DESCRIPTION_FIELD_LENGTH, default="") workflow_name = models.CharField(max_length=WORKFLOW_NAME_SIZE) - prompt_text = models.TextField(default="") is_active = models.BooleanField(default=False) status = models.CharField(max_length=WORKFLOW_STATUS_LENGTH, default="") - llm_response = models.TextField() workflow_owner = models.ForeignKey( User, on_delete=models.SET_NULL, diff --git a/backend/workflow_manager/workflow_v2/serializers.py b/backend/workflow_manager/workflow_v2/serializers.py index 436b88530..17f81a4b0 100644 --- a/backend/workflow_manager/workflow_v2/serializers.py +++ b/backend/workflow_manager/workflow_v2/serializers.py @@ -16,8 +16,6 @@ from utils.serializer.integrity_error_mixin import IntegrityErrorMixin from workflow_manager.endpoint_v2.models import WorkflowEndpoint from workflow_manager.workflow_v2.constants import WorkflowExecutionKey, WorkflowKey -from workflow_manager.workflow_v2.exceptions import WorkflowGenerationError -from workflow_manager.workflow_v2.generator import WorkflowGenerator from workflow_manager.workflow_v2.models.execution import WorkflowExecution from workflow_manager.workflow_v2.models.execution_log import ExecutionLog from workflow_manager.workflow_v2.models.workflow import Workflow @@ -67,33 +65,6 @@ def create(self, validated_data: dict[str, Any]) -> Any: ).user return super().create(validated_data) - def update(self, instance: Any, validated_data: dict[str, Any]) -> Any: - if validated_data.get(WorkflowKey.PROMPT_TEXT): - instance.workflow_tool.all().delete() - return super().update(instance, validated_data) - - def save(self, **kwargs: Any) -> Workflow: - workflow: Workflow = super().save(**kwargs) - if self.validated_data.get(WorkflowKey.PROMPT_TEXT): - try: - tool_serializer = ToolInstanceSerializer( - data=WorkflowGenerator.get_tool_instance_data_from_llm( - workflow=workflow - ), - many=True, - context=self.context, - ) - tool_serializer.is_valid(raise_exception=True) - tool_serializer.save() - except Exception as exc: - logger.error(f"Error while generating tool instances: {exc}") - raise WorkflowGenerationError - - request = self.context.get("request") - if not request: - return workflow - return workflow - class ExecuteWorkflowSerializer(Serializer): workflow_id = UUIDField(required=False) diff --git a/backend/workflow_manager/workflow_v2/views.py b/backend/workflow_manager/workflow_v2/views.py index 5a875aacb..ea005da5f 100644 --- a/backend/workflow_manager/workflow_v2/views.py +++ b/backend/workflow_manager/workflow_v2/views.py @@ -2,7 +2,6 @@ from typing import Any, Optional from django.db.models.query import QuerySet -from numpy import deprecate_with_doc from permissions.permission import IsOwner from pipeline_v2.models import Pipeline from pipeline_v2.pipeline_processor import PipelineProcessor @@ -11,8 +10,6 @@ from rest_framework.request import Request from rest_framework.response import Response from rest_framework.versioning import URLPathVersioning -from tool_instance_v2.tool_processor import ToolProcessor -from unstract.tool_registry.dto import Tool from utils.filtering import FilterHelper from workflow_manager.endpoint_v2.destination import DestinationConnector from workflow_manager.endpoint_v2.dto import FileHash @@ -27,7 +24,6 @@ WorkflowGenerationError, WorkflowRegenerationError, ) -from workflow_manager.workflow_v2.generator import WorkflowGenerator from workflow_manager.workflow_v2.models.execution import WorkflowExecution from workflow_manager.workflow_v2.models.workflow import Workflow from workflow_manager.workflow_v2.serializers import ( @@ -79,14 +75,6 @@ def get_serializer_class(self) -> serializers.Serializer: else: return WorkflowSerializer - @deprecate_with_doc("Not using with the latest UX chnages") - def _generate_workflow(self, workflow_id: str) -> WorkflowGenerator: - registry_tools: list[Tool] = ToolProcessor.get_registry_tools() - generator = WorkflowGenerator(workflow_id=workflow_id) - generator.set_request(self.request) - generator.generate_workflow(registry_tools) - return generator - def perform_update(self, serializer: WorkflowSerializer) -> Workflow: """To edit a workflow. diff --git a/tools/structure/sample.env b/tools/structure/sample.env index d2d2ec900..eac50572c 100644 --- a/tools/structure/sample.env +++ b/tools/structure/sample.env @@ -9,6 +9,3 @@ PROMPT_PORT=3003 X2TEXT_HOST=http://unstract-x2text-service X2TEXT_PORT=3004 - -# Required by Llama Index -OPENAI_API_KEY= diff --git a/unstract/core/src/unstract/core/llm_helper/__init__.py b/unstract/core/src/unstract/core/llm_helper/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstract/core/src/unstract/core/llm_helper/config.py b/unstract/core/src/unstract/core/llm_helper/config.py deleted file mode 100644 index 1a09cabcd..000000000 --- a/unstract/core/src/unstract/core/llm_helper/config.py +++ /dev/null @@ -1,57 +0,0 @@ -from dataclasses import dataclass - -from unstract.core.utilities import UnstractUtils - - -class OpenAIKeys: - OPENAI_API_KEY = "OPENAI_API_KEY" - OPENAI_API_BASE = "OPENAI_API_BASE" - OPENAI_API_VERSION = "OPENAI_API_VERSION" - OPENAI_API_ENGINE = "OPENAI_API_ENGINE" - OPENAI_API_MODEL = "OPENAI_API_MODEL" - OPENAI_API_MODEL_EMBEDDING = "OPENAI_API_MODEL_EMBEDDING" - OPENAI_API_DEPLOYMENT_EMBEDDING = "OPENAI_API_DEPLOYMENT_EMBEDDING" - OPENAI_API_TYPE = "OPENAI_API_TYPE" - - -class OpenAIDefaults: - OPENAI_API_TYPE = "azure" - OPENAI_API_BASE = "https://pandora-one.openai.azure.com/" - OPENAI_API_VERSION = "2023-05-15" - - -@dataclass -class AzureOpenAIConfig: - model: str - deployment_name: str - engine: str - api_key: str - api_version: str - azure_endpoint: str - api_type: str - - @classmethod - def from_env(cls) -> "AzureOpenAIConfig": - kwargs = { - "model": UnstractUtils.get_env(OpenAIKeys.OPENAI_API_MODEL, raise_err=True), - "deployment_name": UnstractUtils.get_env( - OpenAIKeys.OPENAI_API_ENGINE, raise_err=True - ), - "engine": UnstractUtils.get_env( - OpenAIKeys.OPENAI_API_ENGINE, raise_err=True - ), - "api_key": UnstractUtils.get_env(OpenAIKeys.OPENAI_API_KEY, raise_err=True), - "api_version": UnstractUtils.get_env( - OpenAIKeys.OPENAI_API_VERSION, - default=OpenAIDefaults.OPENAI_API_VERSION, - ), - "azure_endpoint": UnstractUtils.get_env( - OpenAIKeys.OPENAI_API_BASE, - default=OpenAIDefaults.OPENAI_API_BASE, - ), - "api_type": UnstractUtils.get_env( - OpenAIKeys.OPENAI_API_TYPE, - default=OpenAIDefaults.OPENAI_API_TYPE, - ), - } - return cls(**kwargs) diff --git a/unstract/core/src/unstract/core/llm_helper/enums.py b/unstract/core/src/unstract/core/llm_helper/enums.py deleted file mode 100644 index ca6631489..000000000 --- a/unstract/core/src/unstract/core/llm_helper/enums.py +++ /dev/null @@ -1,10 +0,0 @@ -from enum import Enum - - -class PromptContext: - GENERATE_CRON_STRING = "GENERATE_CRON_STRING" - - -class LLMResult(Enum): - OK = "OK" - NOK = "NOK" diff --git a/unstract/core/src/unstract/core/llm_helper/llm_cache.py b/unstract/core/src/unstract/core/llm_helper/llm_cache.py deleted file mode 100644 index 5af5b67d1..000000000 --- a/unstract/core/src/unstract/core/llm_helper/llm_cache.py +++ /dev/null @@ -1,113 +0,0 @@ -import hashlib -import logging -import os - -import redis - -logger = logging.getLogger(__name__) - - -class LLMCache: - def __init__(self, cache_key_prefix: str) -> None: - redis_host = os.environ.get("REDIS_HOST") - redis_port = os.environ.get("REDIS_PORT") - if redis_host is None or redis_port is None: - raise RuntimeError("REDIS_HOST or REDIS_PORT environment variable not set") - redis_password = os.environ.get("REDIS_PASSWORD", None) - if redis_password and ( - redis_password == "" or redis_password.lower() == "none" - ): - redis_password = None - self.llm_cache = redis.Redis( - host=redis_host, port=int(redis_port), password=redis_password - ) - self.cache_key_prefix = cache_key_prefix - - def __del__(self): - if self.llm_cache: - self.llm_cache.close() - - def _get_cache_key(self, seed: str) -> str: - _hash = hashlib.sha1() - _hash.update(seed.encode("utf-8")) - hash_hex = _hash.hexdigest() - return self.cache_key_prefix + str(hash_hex) - - def get(self, key: str) -> str: - response = "" - try: - response_bin = self.llm_cache.get(key) - if response_bin is not None: - logger.info("Cache hit") - response = response_bin.decode("utf-8") - else: - logger.info("Cache miss") - except Exception as e: - logger.warning(f"Error loading {key} from cache: {e}") - return response - - def set(self, key: str, value: str) -> None: - try: - self.llm_cache.set(key, value) - except Exception as e: - logger.warning(f"Error saving {key} to cache: {e}") - - def delete(self, *keys: str) -> int: - """Deletes keys from the cache. - - Args: - keys (str): Variable number of keys to delete - - Returns: - int: Number of keys deleted, -1 if it fails - """ - deleted_count = 0 - try: - deleted_count = self.llm_cache.delete(*keys) - logger.info(f"Deleted {deleted_count} keys from the cache") - except Exception: - logger.warning(f"Error deleting {keys} from cache") - return -1 - return deleted_count - - def get_for_prompt(self, prompt: str) -> str: - """Gets the cached value for a prompt It hashes the prompt and prefixes - the key with `cache_key_prefix`. - - Args: - prompt (str): Prompt to retrieve value for - - Returns: - str: Cached response - """ - key = self._get_cache_key(seed=prompt) - return self.get(key=key) - - def set_for_prompt(self, prompt: str, response: str) -> None: - """Sets response from LLM in cache. - - Args: - prompt (str): Used to determine cache key - response (str): Response to be cached - - Returns: - None - """ - key = self._get_cache_key(seed=prompt) - return self.set(key=key, value=response) - - def clear_by_prefix(self) -> int: - """Used to clear the cache by prefix. Prefix is set when instance is - created. Iterates and deletes each key matching prefix. - - Returns: - int: Number of keys deleted, -1 if it fails - """ - logger.info(f"Clearing cache with prefix: {self.cache_key_prefix}") - keys_to_delete = [ - key for key in self.llm_cache.scan_iter(match=self.cache_key_prefix + "*") - ] - if keys_to_delete: - return self.delete(*keys_to_delete) - else: - return 0 diff --git a/unstract/core/src/unstract/core/llm_helper/llm_helper.py b/unstract/core/src/unstract/core/llm_helper/llm_helper.py deleted file mode 100644 index 12d947cc6..000000000 --- a/unstract/core/src/unstract/core/llm_helper/llm_helper.py +++ /dev/null @@ -1,123 +0,0 @@ -import logging -import os -import time -from typing import Optional - -from llama_index.llms import AzureOpenAI - -from unstract.core.llm_helper.config import AzureOpenAIConfig -from unstract.core.llm_helper.enums import LLMResult, PromptContext -from unstract.core.llm_helper.llm_cache import LLMCache -from unstract.core.llm_helper.models import LLMResponse - -logger = logging.getLogger(__name__) - - -class LLMHelper: - """Helps generate response from an LLM for a given prompt. - - It can leverage a prompt context if necessary. - """ - - def __init__( - self, - prompt_template: str = "azure-open-ai/version-0.1", - cache_key_prefix: Optional[str] = None, - prompt_context: Optional[PromptContext] = None, - ) -> None: - self.prompt_template = prompt_template - self.prompt_context = prompt_context - self.prompt = "" - self.cache_key_prefix = "cache:" - if cache_key_prefix: - self.cache_key_prefix += cache_key_prefix + ":" - if prompt_context: - with open( - f"{os.path.dirname(__file__)}/static/prompts/{prompt_template}/{prompt_context}", # noqa - ) as file: - self.prompt = file.read() - self.cache_key_prefix += prompt_context + ":" - - self.llm_cache = LLMCache(cache_key_prefix=self.cache_key_prefix) - - def _prepare_prompt(self, user_prompt: str) -> str: - """Used to add context to the user entered prompt.""" - if not self.prompt: - return user_prompt - prompt_for_model = self.prompt - - if self.prompt_context == PromptContext.GENERATE_CRON_STRING: - prompt_for_model = prompt_for_model.replace("{$user_prompt}", user_prompt) - - return prompt_for_model - - def get_response_from_llm( - self, prompt: str, use_cache: bool = False - ) -> LLMResponse: - """Responds with the LLM output for a given prompt. - - Args: - prompt (str): Prompt to generate response for - use_cache (bool, optional): Flag to retrieve from cache. - Defaults to False. - - Returns: - LLMResponse: LLM output - """ - prompt_for_model = self._prepare_prompt(user_prompt=prompt) - ai_service = self.prompt_template.split("/")[0] - if ai_service == "azure-open-ai": - logger.info("Using Azure OpenAI") - if use_cache: - response = self.llm_cache.get_for_prompt(prompt=prompt_for_model) - if response: - return LLMResponse( - result=LLMResult.OK, output=response, cost_type="cache" - ) - else: - logger.warning("Will call OpenAI API") - start_time = time.time() - - try: - azure_openai_config = AzureOpenAIConfig.from_env() - llm = AzureOpenAI( - model=azure_openai_config.model, - deployment_name=azure_openai_config.deployment_name, - engine=azure_openai_config.engine, - api_key=azure_openai_config.api_key, - api_version=azure_openai_config.api_version, - azure_endpoint=azure_openai_config.azure_endpoint, - api_type=azure_openai_config.api_type, - temperature=0, - max_retries=10, - ) - resp = llm.complete(prompt_for_model) - except Exception as e: - logger.error(f"OpenAI error: {e}") - return LLMResponse( - result=LLMResult.NOK, - output=f"OpenAI error: {e}", - cost_type=ai_service, - ) - end_time = time.time() - resp = resp.text - logger.info(f"OpenAI Response: {resp}") - time_taken = end_time - start_time - - self.llm_cache.set_for_prompt(prompt=prompt_for_model, response=resp) - return LLMResponse(output=resp, cost_type=ai_service, time_taken=time_taken) - else: - logger.error(f"AI service '{ai_service}' not found") - return LLMResponse( - result=LLMResult.NOK, - output=f"AI service '{ai_service}' not found", - cost_type=ai_service, - ) - - def clear_cache(self) -> int: - """Clears the cached responses by using the prefix. - - Returns: - int: Number of cache entries deleted, -1 if it failed - """ - return self.llm_cache.clear_by_prefix() diff --git a/unstract/core/src/unstract/core/llm_helper/models.py b/unstract/core/src/unstract/core/llm_helper/models.py deleted file mode 100644 index a9473947c..000000000 --- a/unstract/core/src/unstract/core/llm_helper/models.py +++ /dev/null @@ -1,12 +0,0 @@ -from dataclasses import dataclass - -from unstract.core.llm_helper.enums import LLMResult - - -@dataclass -class LLMResponse: - output: str - cost_type: str - result: LLMResult = LLMResult.OK - cost: float = 0 - time_taken: float = 0 diff --git a/unstract/core/src/unstract/core/llm_helper/static/prompts/azure-open-ai/version-0.1/GENERATE_CRON_STRING b/unstract/core/src/unstract/core/llm_helper/static/prompts/azure-open-ai/version-0.1/GENERATE_CRON_STRING deleted file mode 100644 index 4c5f3254c..000000000 --- a/unstract/core/src/unstract/core/llm_helper/static/prompts/azure-open-ai/version-0.1/GENERATE_CRON_STRING +++ /dev/null @@ -1,6 +0,0 @@ -$name: ChatGPT 3.5 -$desc: Chat with GPT 3.5 - -You are a cron string generator, given the following generate a valid cron string. The is a human readable text describing the frequency at which a cron job needs to run. No explanation is required, only output the generated cron string. Ensure that the generated cron string exactly represents the user input and pay attention to convert the minutes as well, for example 6:30 PM is "30 18 * * *". Only output the generated cron string. -### - : {$user_prompt} diff --git a/unstract/core/src/unstract/core/llm_workflow_generator/__init__.py b/unstract/core/src/unstract/core/llm_workflow_generator/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstract/core/src/unstract/core/llm_workflow_generator/dto.py b/unstract/core/src/unstract/core/llm_workflow_generator/dto.py deleted file mode 100644 index a4381a1e6..000000000 --- a/unstract/core/src/unstract/core/llm_workflow_generator/dto.py +++ /dev/null @@ -1,11 +0,0 @@ -from dataclasses import dataclass -from typing import Any - - -@dataclass -class ToolSettings: - id: str - tool_uid: str - spec: dict[str, Any] - properties: dict[str, Any] - is_active: bool diff --git a/unstract/core/src/unstract/core/llm_workflow_generator/llm_interface.py b/unstract/core/src/unstract/core/llm_workflow_generator/llm_interface.py deleted file mode 100644 index fb66a8780..000000000 --- a/unstract/core/src/unstract/core/llm_workflow_generator/llm_interface.py +++ /dev/null @@ -1,171 +0,0 @@ -import hashlib -import json -import logging -import os -import time -import uuid - -import redis -from llama_index.llms.azure_openai import AzureOpenAI -from unstract.tool_registry.dto import Properties, Tool - -from unstract.core.llm_helper.config import AzureOpenAIConfig - -# Refactor dated: 19/12/2023 ( Removal of Appkit removal) - - -class LLMInterface: - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(levelname)s - %(filename)s - %(message)s", - ) - - def __init__( - self, - prompt_template: str = "azure-open-ai/version-0.1", - ): - self.prompt_template = prompt_template - prompt_file_path = os.path.join( - os.path.dirname(__file__), - "static", - "prompts", - prompt_template, - "prompt", - ) - with open(prompt_file_path) as file: - self.prompt = file.read() - - def get_provisional_workflow_from_llm( - self, - workflow_id: str, - tools: list[Tool], - user_prompt: str, - use_cache: bool = False, - ): - redis_host = os.environ.get("REDIS_HOST") - redis_port = os.environ.get("REDIS_PORT") - - if redis_host is None: - raise RuntimeError("REDIS_HOST environment variable not set") - redis_password = os.environ.get("REDIS_PASSWORD") - if redis_password and ( - redis_password == "" or redis_password.lower() == "none" - ): - redis_password = None - ai_service = self.prompt_template.split("/")[0] - prompt_for_model = self.prompt - tools_prompt = self.get_tools_description_for_llm(tools) - prompt_for_model = prompt_for_model.replace("{$tools}", tools_prompt) - prompt_for_model = prompt_for_model.replace("{$task}", user_prompt) - logging.debug(prompt_for_model) - _hash = hashlib.sha1() - _hash.update(prompt_for_model.encode("utf-8")) - hash_hex = _hash.hexdigest() - if ai_service == "azure-open-ai": - logging.info("Using Azure OpenAI") - if use_cache: - try: - r = redis.Redis( - host=redis_host, - port=int(redis_port), - password=redis_password, - ) - redis_key = f"cache:{workflow_id}:workflow_prompt:{hash_hex}" - workflow_bin = r.get(redis_key) - if workflow_bin is not None: - logging.info("Cache hit") - workflow = json.loads(workflow_bin.decode("utf-8")) - return { - "result": "OK", - "output": workflow, - "cost_type": "cache", - "cost": 0, - "time_taken": 0, - } - else: - logging.warning("Cache miss. Will call OpenAI API") - except Exception as e: - logging.warning(f"Error loading from cache: {e}") - logging.warning("Will call OpenAI API") - - start_time = time.time() - try: - azure_openai_config = AzureOpenAIConfig.from_env() - llm = AzureOpenAI( - model=azure_openai_config.model, - deployment_name=azure_openai_config.deployment_name, - engine=azure_openai_config.engine, - api_key=azure_openai_config.api_key, - api_version=azure_openai_config.api_version, - azure_endpoint=azure_openai_config.azure_endpoint, - api_type=azure_openai_config.api_type, - temperature=0, - max_retries=10, - ) - resp = llm.complete(prompt_for_model, stop=["//End of JSON"]) - except Exception as e: - logging.error(f"OpenAI error: {e}") - return { - "result": "NOK", - "output": f"Error from OpenAI: {e}", - "cost_type": ai_service, - "cost": 0, - "time_taken": 0, - } - end_time = time.time() - resp = resp.text - logging.info(f"OpenAI Response: {resp}") - time_taken = end_time - start_time - - try: - workflow = json.loads(resp) - logging.info("Workflow parsed successfully") - logging.info(workflow) - except Exception as e: - logging.error("Error parsing workflow") - logging.error(e) - return { - "result": "NOK", - "output": "Error from OpenAI", - "cost_type": ai_service, - "cost": 0, - "time_taken": 0, - } - - for step in workflow["steps"]: - step["id"] = uuid.uuid4().hex - # Let's add it to the cache - try: - r = redis.Redis( - host=redis_host, - port=int(redis_port), - password=redis_password, - ) - redis_key = f"cache:{workflow_id}:workflow_prompt:{hash_hex}" - r.set(redis_key, json.dumps(workflow)) - r.close() - except Exception as e: - logging.warning(f"Error saving workflow to cache: {e}") - return { - "result": "OK", - "output": workflow, - "cost_type": ai_service, - "cost": 0, - "time_taken": time_taken, - } - else: - logging.error(f"AI service '{ai_service}' not found") - return None - - def get_tools_description_for_llm(self, tools: list[Tool]): - desc = "" - for tool in tools: - desc += self.tool_description_for_llm(tool.properties) + "\n" - return desc - - def tool_description_for_llm(self, properties: Properties) -> str: - if not properties: - return "" - desc = f"- {properties.function_name}(" - desc += f") : {properties.description}" - return desc diff --git a/unstract/core/src/unstract/core/llm_workflow_generator/static/prompts/azure-open-ai/version-0.1/prompt b/unstract/core/src/unstract/core/llm_workflow_generator/static/prompts/azure-open-ai/version-0.1/prompt deleted file mode 100644 index 4e5bd0ebe..000000000 --- a/unstract/core/src/unstract/core/llm_workflow_generator/static/prompts/azure-open-ai/version-0.1/prompt +++ /dev/null @@ -1,33 +0,0 @@ -$name: ChatGPT 3.5 -$desc: Chat with GPT 3.5 - -I will ask you to perform a task, your job is to come up with a series of simple steps that will perform the task. Use only the tools listed below as "tools". If a task cannot be performed with the tool list, use the tool called "catch_all". List the tools along with input parameters in an array. If the steps have to be repeated, repeat it in the output and do not explain. No explanation is required. Format the output in a formatted JSON. Do not perform any of the tasks yourself. If the output of one stage is required in the next as input, assign a variable name which replaces - -The JSON format is as follows: -{ - "steps": [ - { - "tool": "tool_name", - "input": { - "input1": "value1" - ... - }, - "output": { - "output": "" - } - }, - { - ... - } - ] -} //End of JSON - -It is very important that you end the JSON with //End of JSON - -Tools: -{$tools} - -Task: -{$task} - -Your JSON output: diff --git a/unstract/core/tests/llm_helper/test_cron_string_gen.py b/unstract/core/tests/llm_helper/test_cron_string_gen.py deleted file mode 100644 index 7decfaa6d..000000000 --- a/unstract/core/tests/llm_helper/test_cron_string_gen.py +++ /dev/null @@ -1,41 +0,0 @@ -import logging -import unittest - -from unstract.core.llm_helper.enums import LLMResult, PromptContext -from unstract.core.llm_helper.llm_helper import LLMHelper - -CRON_GEN_ERROR = "Cron string could not be generated" - - -class LLMHelperTests(unittest.TestCase): - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(levelname)s - %(filename)s - %(message)s", - ) - - # @unittest.skip("Skip") - def test_cron_string_generation(self): - prompt = "Run at 6:00 PM every single day" - # prompt = "Run every alternate day in 4 hour intervals" - # TODO: Below prompt fails, check on this - # prompt = "Run every alternate day in 4 hour intervals, starting from 4:00PM" - logging.info(f"Generating for input: {prompt}") - project_settings = { - "guid": "test", - } - llm_helper = LLMHelper( - cache_key=project_settings["guid"], - prompt_context=PromptContext.GENERATE_CRON_STRING, - ) - llm_response = llm_helper.get_response_from_llm(prompt, use_cache=True) - if llm_response.result != LLMResult.OK: - logging.error(f"{CRON_GEN_ERROR}: {llm_response.output}") - self.fail(f"{CRON_GEN_ERROR}: {llm_response.output}") - logging.info( - f"Generated cron: {llm_response.output} in {llm_response.time_taken:.3f}s" - ) - self.assertEqual(llm_response.output, "0 18 * * *") - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/core/tests/llm_helper/test_llm_cache.py b/unstract/core/tests/llm_helper/test_llm_cache.py deleted file mode 100644 index 3f6389242..000000000 --- a/unstract/core/tests/llm_helper/test_llm_cache.py +++ /dev/null @@ -1,25 +0,0 @@ -import logging -import unittest - -from unstract.core.llm_helper.llm_cache import LLMCache - -CRON_GEN_ERROR = "Cron string could not be generated" - - -class LLMCacheTests(unittest.TestCase): - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(levelname)s - %(filename)s - %(message)s", - ) - - # @unittest.skip("Skip") - def test_cache_clear(self): - cache = LLMCache(cache_key_prefix="cache:test:") - cache.set_for_prompt("prompt1", "response1") - cache.set_for_prompt("prompt2", "response2") - cache.clear_by_prefix() - self.assertEqual(cache.get_for_prompt("prompt1"), "", "Cache is not cleared") - - -if __name__ == "__main__": - unittest.main()