Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add continue on failure in task block #981

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ function TaskNode({ id, data }: NodeProps<TaskNode>) {
maxRetries: data.maxRetries,
maxStepsOverride: data.maxStepsOverride,
allowDownloads: data.allowDownloads,
continueOnFailure: data.continueOnFailure,
downloadSuffix: data.downloadSuffix,
errorCodeMapping: data.errorCodeMapping,
totpVerificationUrl: data.totpVerificationUrl,
Expand Down Expand Up @@ -333,6 +334,24 @@ function TaskNode({ id, data }: NodeProps<TaskNode>) {
/>
</div>
</div>
<div className="flex items-center justify-between">
<div className="flex gap-2">
<Label className="text-xs font-normal text-slate-300">
Continue on Failure
</Label>
</div>
<div className="w-52">
<Switch
checked={inputs.continueOnFailure}
onCheckedChange={(checked) => {
if (!editable) {
return;
}
handleChange("continueOnFailure", checked);
}}
/>
</div>
</div>
<div className="flex items-center justify-between">
<div className="flex gap-2">
<Label className="text-xs font-normal text-slate-300">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export type TaskNodeData = {
parameterKeys: Array<string>;
totpVerificationUrl: string | null;
totpIdentifier: string | null;
continueOnFailure: boolean;
};

export type TaskNode = Node<TaskNodeData, "task">;
Expand All @@ -36,6 +37,7 @@ export const taskNodeDefaultData: TaskNodeData = {
parameterKeys: [],
totpVerificationUrl: null,
totpIdentifier: null,
continueOnFailure: false,
} as const;

export function isTaskNode(node: Node): node is TaskNode {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ function convertToNode(
parameterKeys: block.parameters.map((p) => p.key),
totpIdentifier: block.totp_identifier ?? null,
totpVerificationUrl: block.totp_verification_url ?? null,
continueOnFailure: block.continue_on_failure,
},
};
}
Expand Down Expand Up @@ -518,6 +519,7 @@ function getWorkflowBlock(
parameter_keys: node.data.parameterKeys,
totp_identifier: node.data.totpIdentifier,
totp_verification_url: node.data.totpVerificationUrl,
continue_on_failure: node.data.continueOnFailure,
};
}
case "sendEmail": {
Expand Down
5 changes: 0 additions & 5 deletions skyvern/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,3 @@ class IllegitComplete(SkyvernException):
def __init__(self, data: dict | None = None) -> None:
data_str = f", data={data}" if data else ""
super().__init__(f"Illegit complete{data_str}")


class CachedActionPlanError(SkyvernException):
def __init__(self, message: str) -> None:
super().__init__(message)
54 changes: 17 additions & 37 deletions skyvern/forge/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,9 @@
WebAction,
parse_actions,
)
from skyvern.webeye.actions.caching import retrieve_action_plan
from skyvern.webeye.actions.handler import (
ActionHandler,
extract_information_for_navigation_goal,
poll_verification_code,
)
from skyvern.webeye.actions.handler import ActionHandler, handle_complete_action, poll_verification_code
from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput
from skyvern.webeye.actions.responses import ActionResult, ActionSuccess
from skyvern.webeye.actions.responses import ActionResult
from skyvern.webeye.browser_factory import BrowserState
from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website
from skyvern.webeye.utils.page import SkyvernFrame
Expand Down Expand Up @@ -558,22 +553,7 @@ async def agent_step(
detailed_agent_step_output.extract_action_prompt = extract_action_prompt
json_response = None
actions: list[Action]

using_cached_action_plan = False
if not task.navigation_goal:
actions = [
CompleteAction(
reasoning="Task has no navigation goal.",
data_extraction_goal=task.data_extraction_goal,
)
]
elif (
task_block
and task_block.cache_actions
and (actions := await retrieve_action_plan(task, step, scraped_page))
):
using_cached_action_plan = True
else:
if task.navigation_goal:
self.async_operation_pool.run_operation(task.task_id, AgentPhase.llm)
json_response = await app.LLM_API_HANDLER(
prompt=extract_action_prompt,
Expand All @@ -589,8 +569,14 @@ async def agent_step(
)
detailed_agent_step_output.llm_response = json_response

actions = parse_actions(task, step.step_id, step.order, scraped_page, json_response["actions"])

actions = parse_actions(task, json_response["actions"])
else:
actions = [
CompleteAction(
reasoning="Task has no navigation goal.",
data_extraction_goal=task.data_extraction_goal,
)
]
detailed_agent_step_output.actions = actions
if len(actions) == 0:
LOG.info(
Expand Down Expand Up @@ -635,8 +621,7 @@ async def agent_step(
wait_actions_to_skip = [action for action in actions if action.action_type == ActionType.WAIT]
wait_actions_len = len(wait_actions_to_skip)
# if there are wait actions and there are other actions in the list, skip wait actions
# if we are using cached action plan, we don't skip wait actions
if wait_actions_len > 0 and wait_actions_len < len(actions) and not using_cached_action_plan:
if wait_actions_len > 0 and wait_actions_len < len(actions):
actions = [action for action in actions if action.action_type != ActionType.WAIT]
LOG.info(
"Skipping wait actions",
Expand Down Expand Up @@ -886,10 +871,12 @@ async def check_user_goal_success(
navigation_payload=task.navigation_payload,
elements=scraped_page.build_element_tree(ElementTreeFormat.HTML),
)
screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)

verification_llm_api_handler = app.SECONDARY_LLM_API_HANDLER

verification_response = await verification_llm_api_handler(
prompt=verification_prompt, step=step, screenshots=None
prompt=verification_prompt, step=step, screenshots=screenshots
)
if "user_goal_achieved" not in verification_response or "reasoning" not in verification_response:
LOG.error(
Expand All @@ -908,16 +895,9 @@ async def check_user_goal_success(
return None

LOG.info("User goal achieved, executing complete action")
extracted_data = None
if complete_action.data_extraction_goal:
scrape_action_result = await extract_information_for_navigation_goal(
scraped_page=scraped_page,
task=task,
step=step,
)
extracted_data = scrape_action_result.scraped_data
action_results = await handle_complete_action(complete_action, page, scraped_page, task, step)

return complete_action, [ActionSuccess(data=extracted_data)]
return complete_action, action_results

except Exception:
LOG.error("LLM verification failed for complete action, skipping LLM verification", exc_info=True)
Expand Down
25 changes: 0 additions & 25 deletions skyvern/forge/prompts/skyvern/answer-user-detail-questions.j2

This file was deleted.

8 changes: 4 additions & 4 deletions skyvern/forge/prompts/skyvern/check-user-goal.j2
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Based on the content of the elements on the page, determine whether the user goal has been successfully completed or not.
Based on the content of the screenshot and the elements on the page, determine whether the user goal has been successfully completed or not.

The JSON object should be in this format:
```json
Expand All @@ -7,15 +7,15 @@ The JSON object should be in this format:
"user_goal_achieved": bool // True if the user goal has been completed, False otherwise.
}

Make sure to ONLY return the JSON object, with no additional text before or after it. Do not make any assumptions, return a response solely based on the elements on the page.
Make sure to ONLY return the JSON object, with no additional text before or after it. Do not make any assumptions based on the screenshot, return a response solely based on what you observe in the screenshot and nothing else.

Examples:
{
"reasoning": "There is a success message for a file upload field. Since the user's goal is to upload a file, it has been successfully completed.",
"reasoning": "The screenshot shows a success message for a file upload field. Since the user's goal is to upload a file, it has been successfully completed.",
"user_goal_achieved": true
}
{
"reasoning": "This is a job application form with fields. Since the user's goal is to submit a job application, it has not been successfully completed.",
"reasoning": "The screenshot shows a job application form with fields. Since the user's goal is to submit a job application, it has not been successfully completed.",
"user_goal_achieved": false
}

Expand Down
4 changes: 1 addition & 3 deletions skyvern/forge/prompts/skyvern/extract-action.j2
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ Reply in JSON format with the following keys:
"action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in, and how that moves you towards your overall goal. Output "COMPLETE" action in the "actions" if user_goal_achieved is True.
"actions": array // An array of actions. Here's the format of each action:
[{
"reasoning": str, // The reasoning behind the action. This reasoning must be user information agnostic. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
"user_detail_query": str, // Think of this value as a Jeopardy question. Ask the user for the details you need for executing this action. Ask the question even if the details are disclosed in user goal or user details. If it's a text field, ask for the text. If it's a file upload, ask for the file. If it's a dropdown, ask for the relevant information. If you are clicking on something specific, ask about what to click on. If you're downloading a file and you have multiple options, ask the user which one to download. Otherwise, use null. Examples are: "What product ID should I input into the search bar?", "What file should I upload?", "What is the previous insurance provider of the user?", "Which invoice should I download?", "Does the user have any pets?". If the action doesn't require any user details, use null.
"user_detail_answer": str, // The answer to the `user_detail_query`. The source of this answer can be user goal or user details.
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless the user goal is achieved. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. All other actions are ignored when "TERMINATE" is returned.
"id": str, // The id of the element to take action on. The id has to be one from the elements list
Expand Down
8 changes: 0 additions & 8 deletions skyvern/forge/sdk/api/crypto.py

This file was deleted.

2 changes: 1 addition & 1 deletion skyvern/forge/sdk/api/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def rename_file(file_path: str, new_file_name: str) -> str:
return file_path


def calculate_sha256_for_file(file_path: str) -> str:
def calculate_sha256(file_path: str) -> str:
"""Helper function to calculate SHA256 hash of a file."""
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as f:
Expand Down
58 changes: 0 additions & 58 deletions skyvern/forge/sdk/db/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
from skyvern.forge.sdk.db.exceptions import NotFoundError
from skyvern.forge.sdk.db.models import (
ActionModel,
ArtifactModel,
AWSSecretParameterModel,
BitwardenCreditCardDataParameterModel,
Expand Down Expand Up @@ -69,7 +68,6 @@
WorkflowRunParameter,
WorkflowRunStatus,
)
from skyvern.webeye.actions.actions import Action
from skyvern.webeye.actions.models import AgentStepOutput

LOG = structlog.get_logger()
Expand Down Expand Up @@ -1573,59 +1571,3 @@ async def get_totp_codes(
)
totp_code = (await session.scalars(query)).all()
return [TOTPCode.model_validate(totp_code) for totp_code in totp_code]

async def create_action(self, action: Action) -> Action:
async with self.Session() as session:
new_action = ActionModel(
action_type=action.action_type,
source_action_id=action.source_action_id,
organization_id=action.organization_id,
workflow_run_id=action.workflow_run_id,
task_id=action.task_id,
step_id=action.step_id,
step_order=action.step_order,
action_order=action.action_order,
status=action.status,
reasoning=action.reasoning,
intention=action.intention,
response=action.response,
element_id=action.element_id,
skyvern_element_hash=action.skyvern_element_hash,
skyvern_element_data=action.skyvern_element_data,
action_json=action.model_dump(),
)
session.add(new_action)
await session.commit()
await session.refresh(new_action)
return Action.model_validate(new_action)

async def retrieve_action_plan(self, task: Task) -> list[Action]:
async with self.Session() as session:
subquery = (
select(TaskModel.task_id)
.filter(TaskModel.url == task.url)
.filter(TaskModel.navigation_goal == task.navigation_goal)
.filter(TaskModel.status == TaskStatus.completed)
.order_by(TaskModel.created_at.desc())
.limit(1)
.subquery()
)

query = (
select(ActionModel)
.filter(ActionModel.task_id == subquery.c.task_id)
.order_by(ActionModel.step_order, ActionModel.action_order, ActionModel.created_at)
)

actions = (await session.scalars(query)).all()
return [Action.model_validate(action) for action in actions]

async def get_previous_actions_for_task(self, task_id: str) -> list[Action]:
async with self.Session() as session:
query = (
select(ActionModel)
.filter_by(task_id=task_id)
.order_by(ActionModel.step_order, ActionModel.action_order, ActionModel.created_at)
)
actions = (await session.scalars(query)).all()
return [Action.model_validate(action) for action in actions]
5 changes: 0 additions & 5 deletions skyvern/forge/sdk/db/id.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,6 @@ def generate_totp_code_id() -> str:
return f"totp_{int_id}"


def generate_action_id() -> str:
int_id = generate_id()
return f"a_{int_id}"


def generate_id() -> int:
"""
generate a 64-bit int ID
Expand Down
27 changes: 0 additions & 27 deletions skyvern/forge/sdk/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
from skyvern.forge.sdk.db.id import (
generate_action_id,
generate_artifact_id,
generate_aws_secret_parameter_id,
generate_bitwarden_credit_card_data_parameter_id,
Expand Down Expand Up @@ -438,29 +437,3 @@ class TOTPCodeModel(Base):
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True)
modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)
expired_at = Column(DateTime, index=True)


class ActionModel(Base):
__tablename__ = "actions"
__table_args__ = (Index("action_org_task_step_index", "organization_id", "task_id", "step_id"),)

action_id = Column(String, primary_key=True, index=True, default=generate_action_id)
action_type = Column(String, nullable=False)
source_action_id = Column(String, ForeignKey("actions.action_id"), nullable=True, index=True)
organization_id = Column(String, ForeignKey("organizations.organization_id"), nullable=True)
workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id"), nullable=True)
task_id = Column(String, ForeignKey("tasks.task_id"), nullable=False, index=True)
step_id = Column(String, ForeignKey("steps.step_id"), nullable=False)
step_order = Column(Integer, nullable=False)
action_order = Column(Integer, nullable=False)
status = Column(String, nullable=False)
reasoning = Column(String, nullable=True)
intention = Column(String, nullable=True)
response = Column(String, nullable=True)
element_id = Column(String, nullable=True)
skyvern_element_hash = Column(String, nullable=True)
skyvern_element_data = Column(JSON, nullable=True)
action_json = Column(JSON, nullable=True)

created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)
Loading
Loading