Skyvern-AI · wintonzheng · Oct 15, 2024 · Oct 15, 2024
diff --git a/skyvern-frontend/src/routes/workflows/editor/nodes/TaskNode/TaskNode.tsx b/skyvern-frontend/src/routes/workflows/editor/nodes/TaskNode/TaskNode.tsx
@@ -73,6 +73,7 @@ function TaskNode({ id, data }: NodeProps<TaskNode>) {
     maxRetries: data.maxRetries,
     maxStepsOverride: data.maxStepsOverride,
     allowDownloads: data.allowDownloads,
+    continueOnFailure: data.continueOnFailure,
     downloadSuffix: data.downloadSuffix,
     errorCodeMapping: data.errorCodeMapping,
     totpVerificationUrl: data.totpVerificationUrl,
@@ -333,6 +334,24 @@ function TaskNode({ id, data }: NodeProps<TaskNode>) {
                   />
                 </div>
               </div>
+              <div className="flex items-center justify-between">
+                <div className="flex gap-2">
+                  <Label className="text-xs font-normal text-slate-300">
+                    Continue on Failure
+                  </Label>
+                </div>
+                <div className="w-52">
+                  <Switch
+                    checked={inputs.continueOnFailure}
+                    onCheckedChange={(checked) => {
+                      if (!editable) {
+                        return;
+                      }
+                      handleChange("continueOnFailure", checked);
+                    }}
+                  />
+                </div>
+              </div>
               <div className="flex items-center justify-between">
                 <div className="flex gap-2">
                   <Label className="text-xs font-normal text-slate-300">

diff --git a/skyvern-frontend/src/routes/workflows/editor/nodes/TaskNode/types.ts b/skyvern-frontend/src/routes/workflows/editor/nodes/TaskNode/types.ts
@@ -15,6 +15,7 @@ export type TaskNodeData = {
   parameterKeys: Array<string>;
   totpVerificationUrl: string | null;
   totpIdentifier: string | null;
+  continueOnFailure: boolean;
 };
 
 export type TaskNode = Node<TaskNodeData, "task">;
@@ -36,6 +37,7 @@ export const taskNodeDefaultData: TaskNodeData = {
   parameterKeys: [],
   totpVerificationUrl: null,
   totpIdentifier: null,
+  continueOnFailure: false,
 } as const;
 
 export function isTaskNode(node: Node): node is TaskNode {

diff --git a/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts b/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts
@@ -147,6 +147,7 @@ function convertToNode(
           parameterKeys: block.parameters.map((p) => p.key),
           totpIdentifier: block.totp_identifier ?? null,
           totpVerificationUrl: block.totp_verification_url ?? null,
+          continueOnFailure: block.continue_on_failure,
         },
       };
     }
@@ -518,6 +519,7 @@ function getWorkflowBlock(
         parameter_keys: node.data.parameterKeys,
         totp_identifier: node.data.totpIdentifier,
         totp_verification_url: node.data.totpVerificationUrl,
+        continue_on_failure: node.data.continueOnFailure,
       };
     }
     case "sendEmail": {

diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py
@@ -490,8 +490,3 @@ class IllegitComplete(SkyvernException):
     def __init__(self, data: dict | None = None) -> None:
         data_str = f", data={data}" if data else ""
         super().__init__(f"Illegit complete{data_str}")
-
-
-class CachedActionPlanError(SkyvernException):
-    def __init__(self, message: str) -> None:
-        super().__init__(message)
diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py
@@ -51,14 +51,9 @@
     WebAction,
     parse_actions,
 )
-from skyvern.webeye.actions.caching import retrieve_action_plan
-from skyvern.webeye.actions.handler import (
-    ActionHandler,
-    extract_information_for_navigation_goal,
-    poll_verification_code,
-)
+from skyvern.webeye.actions.handler import ActionHandler, handle_complete_action, poll_verification_code
 from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput
-from skyvern.webeye.actions.responses import ActionResult, ActionSuccess
+from skyvern.webeye.actions.responses import ActionResult
 from skyvern.webeye.browser_factory import BrowserState
 from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website
 from skyvern.webeye.utils.page import SkyvernFrame
@@ -558,22 +553,7 @@ async def agent_step(
             detailed_agent_step_output.extract_action_prompt = extract_action_prompt
             json_response = None
             actions: list[Action]
-
-            using_cached_action_plan = False
-            if not task.navigation_goal:
-                actions = [
-                    CompleteAction(
-                        reasoning="Task has no navigation goal.",
-                        data_extraction_goal=task.data_extraction_goal,
-                    )
-                ]
-            elif (
-                task_block
-                and task_block.cache_actions
-                and (actions := await retrieve_action_plan(task, step, scraped_page))
-            ):
-                using_cached_action_plan = True
-            else:
+            if task.navigation_goal:
                 self.async_operation_pool.run_operation(task.task_id, AgentPhase.llm)
                 json_response = await app.LLM_API_HANDLER(
                     prompt=extract_action_prompt,
@@ -589,8 +569,14 @@ async def agent_step(
                 )
                 detailed_agent_step_output.llm_response = json_response
 
-                actions = parse_actions(task, step.step_id, step.order, scraped_page, json_response["actions"])
-
+                actions = parse_actions(task, json_response["actions"])
+            else:
+                actions = [
+                    CompleteAction(
+                        reasoning="Task has no navigation goal.",
+                        data_extraction_goal=task.data_extraction_goal,
+                    )
+                ]
             detailed_agent_step_output.actions = actions
             if len(actions) == 0:
                 LOG.info(
@@ -635,8 +621,7 @@ async def agent_step(
                 wait_actions_to_skip = [action for action in actions if action.action_type == ActionType.WAIT]
                 wait_actions_len = len(wait_actions_to_skip)
                 # if there are wait actions and there are other actions in the list, skip wait actions
-                # if we are using cached action plan, we don't skip wait actions
-                if wait_actions_len > 0 and wait_actions_len < len(actions) and not using_cached_action_plan:
+                if wait_actions_len > 0 and wait_actions_len < len(actions):
                     actions = [action for action in actions if action.action_type != ActionType.WAIT]
                     LOG.info(
                         "Skipping wait actions",
@@ -886,10 +871,12 @@ async def check_user_goal_success(
                 navigation_payload=task.navigation_payload,
                 elements=scraped_page.build_element_tree(ElementTreeFormat.HTML),
             )
+            screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)
+
             verification_llm_api_handler = app.SECONDARY_LLM_API_HANDLER
 
             verification_response = await verification_llm_api_handler(
-                prompt=verification_prompt, step=step, screenshots=None
+                prompt=verification_prompt, step=step, screenshots=screenshots
             )
             if "user_goal_achieved" not in verification_response or "reasoning" not in verification_response:
                 LOG.error(
@@ -908,16 +895,9 @@ async def check_user_goal_success(
                 return None
 
             LOG.info("User goal achieved, executing complete action")
-            extracted_data = None
-            if complete_action.data_extraction_goal:
-                scrape_action_result = await extract_information_for_navigation_goal(
-                    scraped_page=scraped_page,
-                    task=task,
-                    step=step,
-                )
-                extracted_data = scrape_action_result.scraped_data
+            action_results = await handle_complete_action(complete_action, page, scraped_page, task, step)
 
-            return complete_action, [ActionSuccess(data=extracted_data)]
+            return complete_action, action_results
 
         except Exception:
             LOG.error("LLM verification failed for complete action, skipping LLM verification", exc_info=True)

diff --git a/skyvern/forge/prompts/skyvern/answer-user-detail-questions.j2 b/skyvern/forge/prompts/skyvern/answer-user-detail-questions.j2
diff --git a/skyvern/forge/prompts/skyvern/check-user-goal.j2 b/skyvern/forge/prompts/skyvern/check-user-goal.j2
@@ -1,4 +1,4 @@
-Based on the content of the elements on the page, determine whether the user goal has been successfully completed or not.
+Based on the content of the screenshot and the elements on the page, determine whether the user goal has been successfully completed or not.
 
 The JSON object should be in this format:
 ```json
@@ -7,15 +7,15 @@ The JSON object should be in this format:
   "user_goal_achieved": bool // True if the user goal has been completed, False otherwise.
 }
 
-Make sure to ONLY return the JSON object, with no additional text before or after it. Do not make any assumptions, return a response solely based on the elements on the page.
+Make sure to ONLY return the JSON object, with no additional text before or after it. Do not make any assumptions based on the screenshot, return a response solely based on what you observe in the screenshot and nothing else.
 
 Examples:
 {
-  "reasoning": "There is a success message for a file upload field. Since the user's goal is to upload a file, it has been successfully completed.",
+  "reasoning": "The screenshot shows a success message for a file upload field. Since the user's goal is to upload a file, it has been successfully completed.",
   "user_goal_achieved": true
 }
 {
-  "reasoning": "This is a job application form with fields. Since the user's goal is to submit a job application, it has not been successfully completed.",
+  "reasoning": "The screenshot shows a job application form with fields. Since the user's goal is to submit a job application, it has not been successfully completed.",
   "user_goal_achieved": false
 }
 

diff --git a/skyvern/forge/prompts/skyvern/extract-action.j2 b/skyvern/forge/prompts/skyvern/extract-action.j2
@@ -14,9 +14,7 @@ Reply in JSON format with the following keys:
     "action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in, and how that moves you towards your overall goal. Output "COMPLETE" action in the "actions" if user_goal_achieved is True.
     "actions": array // An array of actions. Here's the format of each action:
     [{
-        "reasoning": str, // The reasoning behind the action. This reasoning must be user information agnostic. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
-        "user_detail_query": str, // Think of this value as a Jeopardy question. Ask the user for the details you need for executing this action. Ask the question even if the details are disclosed in user goal or user details. If it's a text field, ask for the text. If it's a file upload, ask for the file. If it's a dropdown, ask for the relevant information. If you are clicking on something specific, ask about what to click on. If you're downloading a file and you have multiple options, ask the user which one to download. Otherwise, use null. Examples are: "What product ID should I input into the search bar?", "What file should I upload?", "What is the previous insurance provider of the user?", "Which invoice should I download?", "Does the user have any pets?". If the action doesn't require any user details, use null.
-        "user_detail_answer": str, // The answer to the `user_detail_query`. The source of this answer can be user goal or user details.
+        "reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
         "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
         "action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless the user goal is achieved. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. All other actions are ignored when "TERMINATE" is returned.
         "id": str, // The id of the element to take action on. The id has to be one from the elements list

diff --git a/skyvern/forge/sdk/api/crypto.py b/skyvern/forge/sdk/api/crypto.py
diff --git a/skyvern/forge/sdk/api/files.py b/skyvern/forge/sdk/api/files.py
@@ -113,7 +113,7 @@ def rename_file(file_path: str, new_file_name: str) -> str:
         return file_path
 
 
-def calculate_sha256_for_file(file_path: str) -> str:
+def calculate_sha256(file_path: str) -> str:
     """Helper function to calculate SHA256 hash of a file."""
     sha256_hash = hashlib.sha256()
     with open(file_path, "rb") as f:

diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py
@@ -13,7 +13,6 @@
 from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
 from skyvern.forge.sdk.db.exceptions import NotFoundError
 from skyvern.forge.sdk.db.models import (
-    ActionModel,
     ArtifactModel,
     AWSSecretParameterModel,
     BitwardenCreditCardDataParameterModel,
@@ -69,7 +68,6 @@
     WorkflowRunParameter,
     WorkflowRunStatus,
 )
-from skyvern.webeye.actions.actions import Action
 from skyvern.webeye.actions.models import AgentStepOutput
 
 LOG = structlog.get_logger()
@@ -1573,59 +1571,3 @@ async def get_totp_codes(
             )
             totp_code = (await session.scalars(query)).all()
             return [TOTPCode.model_validate(totp_code) for totp_code in totp_code]
-
-    async def create_action(self, action: Action) -> Action:
-        async with self.Session() as session:
-            new_action = ActionModel(
-                action_type=action.action_type,
-                source_action_id=action.source_action_id,
-                organization_id=action.organization_id,
-                workflow_run_id=action.workflow_run_id,
-                task_id=action.task_id,
-                step_id=action.step_id,
-                step_order=action.step_order,
-                action_order=action.action_order,
-                status=action.status,
-                reasoning=action.reasoning,
-                intention=action.intention,
-                response=action.response,
-                element_id=action.element_id,
-                skyvern_element_hash=action.skyvern_element_hash,
-                skyvern_element_data=action.skyvern_element_data,
-                action_json=action.model_dump(),
-            )
-            session.add(new_action)
-            await session.commit()
-            await session.refresh(new_action)
-            return Action.model_validate(new_action)
-
-    async def retrieve_action_plan(self, task: Task) -> list[Action]:
-        async with self.Session() as session:
-            subquery = (
-                select(TaskModel.task_id)
-                .filter(TaskModel.url == task.url)
-                .filter(TaskModel.navigation_goal == task.navigation_goal)
-                .filter(TaskModel.status == TaskStatus.completed)
-                .order_by(TaskModel.created_at.desc())
-                .limit(1)
-                .subquery()
-            )
-
-            query = (
-                select(ActionModel)
-                .filter(ActionModel.task_id == subquery.c.task_id)
-                .order_by(ActionModel.step_order, ActionModel.action_order, ActionModel.created_at)
-            )
-
-            actions = (await session.scalars(query)).all()
-            return [Action.model_validate(action) for action in actions]
-
-    async def get_previous_actions_for_task(self, task_id: str) -> list[Action]:
-        async with self.Session() as session:
-            query = (
-                select(ActionModel)
-                .filter_by(task_id=task_id)
-                .order_by(ActionModel.step_order, ActionModel.action_order, ActionModel.created_at)
-            )
-            actions = (await session.scalars(query)).all()
-            return [Action.model_validate(action) for action in actions]
diff --git a/skyvern/forge/sdk/db/id.py b/skyvern/forge/sdk/db/id.py
@@ -130,11 +130,6 @@ def generate_totp_code_id() -> str:
     return f"totp_{int_id}"
 
 
-def generate_action_id() -> str:
-    int_id = generate_id()
-    return f"a_{int_id}"
-
-
 def generate_id() -> int:
     """
     generate a 64-bit int ID

diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py
@@ -19,7 +19,6 @@
 
 from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
 from skyvern.forge.sdk.db.id import (
-    generate_action_id,
     generate_artifact_id,
     generate_aws_secret_parameter_id,
     generate_bitwarden_credit_card_data_parameter_id,
@@ -438,29 +437,3 @@ class TOTPCodeModel(Base):
     created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True)
     modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)
     expired_at = Column(DateTime, index=True)
-
-
-class ActionModel(Base):
-    __tablename__ = "actions"
-    __table_args__ = (Index("action_org_task_step_index", "organization_id", "task_id", "step_id"),)
-
-    action_id = Column(String, primary_key=True, index=True, default=generate_action_id)
-    action_type = Column(String, nullable=False)
-    source_action_id = Column(String, ForeignKey("actions.action_id"), nullable=True, index=True)
-    organization_id = Column(String, ForeignKey("organizations.organization_id"), nullable=True)
-    workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id"), nullable=True)
-    task_id = Column(String, ForeignKey("tasks.task_id"), nullable=False, index=True)
-    step_id = Column(String, ForeignKey("steps.step_id"), nullable=False)
-    step_order = Column(Integer, nullable=False)
-    action_order = Column(Integer, nullable=False)
-    status = Column(String, nullable=False)
-    reasoning = Column(String, nullable=True)
-    intention = Column(String, nullable=True)
-    response = Column(String, nullable=True)
-    element_id = Column(String, nullable=True)
-    skyvern_element_hash = Column(String, nullable=True)
-    skyvern_element_data = Column(JSON, nullable=True)
-    action_json = Column(JSON, nullable=True)
-
-    created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
-    modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)