diff --git a/.github/workflows/ai.yml b/.github/workflows/ai.yml index ec12a0052..0b4ea0c79 100644 --- a/.github/workflows/ai.yml +++ b/.github/workflows/ai.yml @@ -22,7 +22,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} MIDSCENE_MODEL_NAME: gpt-4o-2024-08-06 - MIDSCENE_DEBUG_AI_PROFILE: 1 + # MIDSCENE_DEBUG_AI_PROFILE: 1 steps: - uses: actions/checkout@v4 diff --git a/packages/cli/src/printer.ts b/packages/cli/src/printer.ts index 2da87bf2c..920422511 100644 --- a/packages/cli/src/printer.ts +++ b/packages/cli/src/printer.ts @@ -30,8 +30,9 @@ export const flowItemBrief = (flowItem?: MidsceneYamlFlowItem) => { } const sliceText = (text?: string) => { - if (text && text.length > 12) { - return `${text.slice(0, 12)}...`; + const lengthLimit = 60; + if (text && text.length > lengthLimit) { + return `${text.slice(0, lengthLimit)}...`; } return text || ''; @@ -42,7 +43,8 @@ export const flowItemBrief = (flowItem?: MidsceneYamlFlowItem) => { (flowItem as MidsceneYamlFlowItemAIAction).ai ) { return `aiAction: ${sliceText( - (flowItem as MidsceneYamlFlowItemAIAction).aiAction || + (flowItem as MidsceneYamlFlowItemAIAction).aiActionProgressTip || + (flowItem as MidsceneYamlFlowItemAIAction).aiAction || (flowItem as MidsceneYamlFlowItemAIAction).ai, )}`; } @@ -104,7 +106,7 @@ export const contextInfo = (context: MidsceneYamlFileContext) => { const reportFile = context.player.reportFile; const reportFileToShow = relative(process.cwd(), reportFile || ''); const reportText = reportFile - ? `\n${indent}${chalk.gray(`report: ${reportFileToShow}`)}` + ? `\n${indent}${chalk.gray(`report: ./${reportFileToShow}`)}` : ''; const mergedText = diff --git a/packages/cli/src/types.d.ts b/packages/cli/src/types.d.ts index 87623bc72..2c7716605 100644 --- a/packages/cli/src/types.d.ts +++ b/packages/cli/src/types.d.ts @@ -16,6 +16,7 @@ export interface MidsceneYamlScriptEnv { export interface MidsceneYamlFlowItemAIAction { ai?: string; // this is the shortcut for aiAction aiAction?: string; + aiActionProgressTip?: string; } export interface MidsceneYamlFlowItemAIAssert { diff --git a/packages/cli/src/yaml-player.ts b/packages/cli/src/yaml-player.ts index a1a29382e..98d4228ce 100644 --- a/packages/cli/src/yaml-player.ts +++ b/packages/cli/src/yaml-player.ts @@ -7,6 +7,8 @@ import assert from 'node:assert'; import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { basename, dirname, extname, join } from 'node:path'; import { PuppeteerAgent } from '@midscene/web/puppeteer'; +import { paramStr, typeStr } from '@midscene/web/ui-utils'; + import { contextInfo, contextTaskListSummary, @@ -116,14 +118,14 @@ export async function playYamlFiles( ttyRenderer.start(); for (const context of fileContextList) { - await context.player.play(); + await context.player.run(); } ttyRenderer.stop(); } else { for (const context of fileContextList) { const { mergedText } = contextInfo(context); console.log(mergedText); - await context.player.play(); + await context.player.run(); console.log(contextTaskListSummary(context.player.taskStatus, context)); } } @@ -220,7 +222,13 @@ export class ScriptPlayer { typeof prompt === 'string', 'prompt for aiAction must be a string', ); - await agent.aiAction(prompt); + await agent.aiAction(prompt, { + onTaskStart(task) { + const tip = `${typeStr(task)} - ${paramStr(task)}`; + (flowItem as MidsceneYamlFlowItemAIAction).aiActionProgressTip = + tip; + }, + }); } else if ((flowItem as MidsceneYamlFlowItemAIAssert).aiAssert) { const assertTask = flowItem as MidsceneYamlFlowItemAIAssert; const prompt = assertTask.aiAssert; @@ -273,7 +281,7 @@ export class ScriptPlayer { this.reportFile = agent.reportFile; } - async play() { + async run() { const { target, tasks } = this.script; this.setPlayerStatus('running'); diff --git a/packages/cli/tests/__snapshots__/printer.test.ts.snap b/packages/cli/tests/__snapshots__/printer.test.ts.snap index f4ed4a758..df5b2fb02 100644 --- a/packages/cli/tests/__snapshots__/printer.test.ts.snap +++ b/packages/cli/tests/__snapshots__/printer.test.ts.snap @@ -1,7 +1,7 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`printer > action brief text 1`] = `"aiAction: search for w..."`; +exports[`printer > action brief text 1`] = `"aiAction: search for weather"`; exports[`printer > action brief text 2`] = `"sleep: 1000"`; -exports[`printer > action brief text 3`] = `"aiWaitFor: wait for som..."`; +exports[`printer > action brief text 3`] = `"aiWaitFor: wait for something"`; diff --git a/packages/cli/tests/midscene_scripts/sub/bing.yaml b/packages/cli/tests/midscene_scripts/sub/bing.yaml index bb70e5b1c..81cad5ab7 100644 --- a/packages/cli/tests/midscene_scripts/sub/bing.yaml +++ b/packages/cli/tests/midscene_scripts/sub/bing.yaml @@ -1,5 +1,5 @@ target: - url: https://www.baidu.com + url: https://www.bing.com tasks: - name: search weather flow: diff --git a/packages/cli/tests/yaml.test.ts b/packages/cli/tests/yaml.test.ts index de6eb0daa..f95eeee48 100644 --- a/packages/cli/tests/yaml.test.ts +++ b/packages/cli/tests/yaml.test.ts @@ -7,7 +7,7 @@ import { assert, describe, expect, test, vi } from 'vitest'; const runYaml = async (yamlString: string) => { const script = loadYamlScript(yamlString); const player = new ScriptPlayer(script); - await player.play(); + await player.run(); assert( player.status === 'done', player.errorInSetup?.message || 'unknown error', diff --git a/packages/midscene/src/action/executor.ts b/packages/midscene/src/action/executor.ts index 02a4dbd60..1b74aed55 100644 --- a/packages/midscene/src/action/executor.ts +++ b/packages/midscene/src/action/executor.ts @@ -5,6 +5,7 @@ import type { ExecutionTask, ExecutionTaskApply, ExecutionTaskInsightLocateOutput, + ExecutionTaskProgressOptions, ExecutionTaskReturn, ExecutorContext, } from '@/types'; @@ -20,19 +21,19 @@ export class Executor { // status of executor status: 'init' | 'pending' | 'running' | 'completed' | 'error'; - onFlushUpdate?: () => void; + onTaskStart?: ExecutionTaskProgressOptions['onTaskStart']; constructor( name: string, description?: string, tasks?: ExecutionTaskApply[], - onFlushUpdate?: () => void, + options?: ExecutionTaskProgressOptions, ) { this.status = tasks && tasks.length > 0 ? 'pending' : 'init'; this.name = name; this.description = description; this.tasks = (tasks || []).map((item) => this.markTaskAsPending(item)); - this.onFlushUpdate = onFlushUpdate; + this.onTaskStart = options?.onTaskStart; } private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask { @@ -84,13 +85,6 @@ export class Executor { while (taskIndex < this.tasks.length) { const task = this.tasks[taskIndex]; - try { - if (this.onFlushUpdate) { - this.onFlushUpdate(); - } - } catch (e) { - // console.error('error in onFlushUpdate', e); - } assert( task.status === 'pending', `task status should be pending, but got: ${task.status}`, @@ -100,6 +94,13 @@ export class Executor { }; try { task.status = 'running'; + try { + if (this.onTaskStart) { + await this.onTaskStart(task); + } + } catch (e) { + // console.error('error in onTaskStart', e); + } assert( ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0, `unsupported task type: ${task.type}`, @@ -162,9 +163,7 @@ export class Executor { } else { this.status = 'error'; } - if (this.onFlushUpdate) { - await this.onFlushUpdate(); - } + if (this.tasks.length) { // return the last output const outputIndex = Math.min(taskIndex, this.tasks.length - 1); diff --git a/packages/midscene/src/insight/index.ts b/packages/midscene/src/insight/index.ts index cdf72c451..495199455 100644 --- a/packages/midscene/src/insight/index.ts +++ b/packages/midscene/src/insight/index.ts @@ -96,7 +96,7 @@ export default class Insight< const startTime = Date.now(); const { parseResult, elementById, rawResponse, usage } = await AiInspectElement({ - callAI, + callAI: callAI || this.aiVendorFn, context, multi: Boolean(multi), targetElementDescription: queryPrompt, diff --git a/packages/midscene/src/types.ts b/packages/midscene/src/types.ts index 5aacf977c..355871775 100644 --- a/packages/midscene/src/types.ts +++ b/packages/midscene/src/types.ts @@ -313,6 +313,9 @@ export interface PlaywrightParserOpt extends BaseAgentParserOpt {} /* action */ +export interface ExecutionTaskProgressOptions { + onTaskStart?: (task: ExecutionTask) => Promise | void; +} export interface ExecutionRecorderItem { type: 'screenshot'; diff --git a/packages/midscene/tests/ai/extract/extract.test.ts b/packages/midscene/tests/ai/extract/extract.test.ts index 27aab9885..6b023a471 100644 --- a/packages/midscene/tests/ai/extract/extract.test.ts +++ b/packages/midscene/tests/ai/extract/extract.test.ts @@ -8,8 +8,6 @@ vi.setConfig({ hookTimeout: 30 * 1000, }); -const useModel = undefined; - const modelList: Array<'openAI' | 'coze'> = ['openAI']; if (preferCozeModel('coze')) { diff --git a/packages/midscene/tests/ai/executor/__snapshots__/index.test.ts.snap b/packages/midscene/tests/unit-test/executor/__snapshots__/index.test.ts.snap similarity index 100% rename from packages/midscene/tests/ai/executor/__snapshots__/index.test.ts.snap rename to packages/midscene/tests/unit-test/executor/__snapshots__/index.test.ts.snap diff --git a/packages/midscene/tests/ai/executor/index.test.ts b/packages/midscene/tests/unit-test/executor/index.test.ts similarity index 68% rename from packages/midscene/tests/ai/executor/index.test.ts rename to packages/midscene/tests/unit-test/executor/index.test.ts index 5ad4bbe23..27d10bed6 100644 --- a/packages/midscene/tests/ai/executor/index.test.ts +++ b/packages/midscene/tests/unit-test/executor/index.test.ts @@ -23,6 +23,7 @@ const insightFindTask = (shouldThrow?: boolean) => { param: { prompt: 'test', }, + locate: null, async executor(param, taskContext) { if (shouldThrow) { const { task } = taskContext; @@ -46,70 +47,58 @@ const insightFindTask = (shouldThrow?: boolean) => { return insightFindTask; }; -vi.setConfig({ - testTimeout: 40 * 1000, -}); - describe('executor', () => { - it( - 'insight - basic run', - async () => { - const insightTask1 = insightFindTask(); - const flushResultData = 'abcdef'; - const taskParam = { - action: 'tap', - anything: 'acceptable', - }; - const tapperFn = vi.fn(); - const actionTask: ExecutionTaskActionApply = { - type: 'Action', - param: taskParam, - executor: tapperFn, - }; - const actionTask2: ExecutionTaskActionApply = { - type: 'Action', - param: taskParam, - executor: async () => { - return { - output: flushResultData, - } as any; - }, - }; + it('insight - basic run', async () => { + const insightTask1 = insightFindTask(); + const flushResultData = 'abcdef'; + const taskParam = { + action: 'tap', + anything: 'acceptable', + }; + const tapperFn = vi.fn(); + const actionTask: ExecutionTaskActionApply = { + type: 'Action', + param: taskParam, + locate: null, + executor: tapperFn, + }; + const actionTask2: ExecutionTaskActionApply = { + type: 'Action', + param: taskParam, + locate: null, + executor: async () => { + return { + output: flushResultData, + } as any; + }, + }; - const inputTasks = [insightTask1, actionTask, actionTask2]; - - const executor = new Executor( - 'test', - 'hello, this is a test', - inputTasks, - ); - const flushResult = await executor.flush(); - const tasks = executor.tasks as ExecutionTaskInsightLocate[]; - const { element } = tasks[0].output || {}; - expect(element).toBeTruthy(); - - expect(tasks.length).toBe(inputTasks.length); - expect(tasks[0].status).toBe('finished'); - expect(tasks[0].output).toMatchSnapshot(); - expect(tasks[0].log?.dump).toBeTruthy(); - expect(tasks[0].timing?.end).toBeTruthy(); - expect(tasks[0].cache).toBeTruthy(); - expect(tasks[0].cache?.hit).toEqual(false); - - expect(tapperFn).toBeCalledTimes(1); - expect(tapperFn.mock.calls[0][0]).toBe(taskParam); - expect(tapperFn.mock.calls[0][1].element).toBe(element); - expect(tapperFn.mock.calls[0][1].task).toBeTruthy(); - - const dump = executor.dump(); - expect(dump.logTime).toBeTruthy(); - - expect(flushResult).toBe(flushResultData); - }, - { - timeout: 999 * 1000, - }, - ); + const inputTasks = [insightTask1, actionTask, actionTask2]; + + const executor = new Executor('test', 'hello, this is a test', inputTasks); + const flushResult = await executor.flush(); + const tasks = executor.tasks as ExecutionTaskInsightLocate[]; + expect(executor.isInErrorState()).toBeFalsy(); + const { element } = tasks[0].output || {}; + expect(element).toBeTruthy(); + + expect(tasks.length).toBe(inputTasks.length); + expect(tasks[0].status).toBe('finished'); + expect(tasks[0].output).toMatchSnapshot(); + expect(tasks[0].log?.dump).toBeTruthy(); + expect(tasks[0].timing?.end).toBeTruthy(); + expect(tasks[0].cache).toBeTruthy(); + expect(tasks[0].cache?.hit).toEqual(false); + + expect(tapperFn).toBeCalledTimes(1); + expect(tapperFn.mock.calls[0][0]).toBe(taskParam); + expect(tapperFn.mock.calls[0][1].task).toBeTruthy(); + + const dump = executor.dump(); + expect(dump.logTime).toBeTruthy(); + + expect(flushResult).toBe(flushResultData); + }); it('insight - init and append', async () => { const initExecutor = new Executor('test'); @@ -123,6 +112,7 @@ describe('executor', () => { action: 'tap', element: 'previous', }, + locate: null, executor: async () => { // delay 500 await new Promise((resolve) => setTimeout(resolve, 500)); diff --git a/packages/midscene/tests/utils.ts b/packages/midscene/tests/utils.ts index 41255d9ef..ab01ba982 100644 --- a/packages/midscene/tests/utils.ts +++ b/packages/midscene/tests/utils.ts @@ -1,13 +1,13 @@ import { readFileSync, writeFileSync } from 'node:fs'; -/* eslint-disable @typescript-eslint/no-magic-numbers */ import path, { join } from 'node:path'; +import type { callAiFn } from '@/ai-model/common'; import { base64Encoded, imageInfoOfBase64, transformImgPathToBase64, } from '@/image'; import Insight from '@/insight'; -import type { BaseElement, UIContext } from '@/types'; +import type { AIElementIdResponse, BaseElement, UIContext } from '@/types'; import { vi } from 'vitest'; export function getFixture(name: string) { @@ -45,16 +45,18 @@ export function fakeInsight(content: string) { center: [250, 250], tap: vi.fn() as unknown, }, - // describer: basicPa ] as unknown as BaseElement[], }; const context: UIContext = { ...basicContext, }; - const aiVendor = () => ({ - elements: [{ id: '0' }], - errors: [], + const aiVendor: typeof callAiFn = async () => ({ + content: { + elements: [{ id: '0', reason: '', text: '' }], + errors: [], + }, + usage: undefined, }); const insight = new Insight(context, { diff --git a/packages/visualizer/src/component/detail-side.tsx b/packages/visualizer/src/component/detail-side.tsx index ee60bea3b..4a37fbed3 100644 --- a/packages/visualizer/src/component/detail-side.tsx +++ b/packages/visualizer/src/component/detail-side.tsx @@ -1,7 +1,9 @@ /* eslint-disable max-lines */ 'use client'; import './detail-side.less'; -import { paramStr, timeStr, typeStr } from '@/utils'; +import { timeStr } from '@/utils'; +import { paramStr, typeStr } from '@midscene/web/ui-utils'; + import { RadiusSettingOutlined } from '@ant-design/icons'; import type { BaseElement, diff --git a/packages/visualizer/src/component/player.tsx b/packages/visualizer/src/component/player.tsx index 4096e6abe..fef3a9d6b 100644 --- a/packages/visualizer/src/component/player.tsx +++ b/packages/visualizer/src/component/player.tsx @@ -692,10 +692,26 @@ export default function Player(props?: { } }, [replayMark]); + const [mouseOverStatusIcon, setMouseOverStatusIcon] = useState(false); const progressString = Math.round(animationProgress * 100); const transitionStyle = animationProgress === 0 ? 'none' : '0.3s'; - const [mouseOverStatusIcon, setMouseOverStatusIcon] = useState(false); + // if the animation can be replay now, listen to the "" + const canReplayNow = animationProgress === 1; + useEffect(() => { + if (canReplayNow) { + const listener = (event: KeyboardEvent) => { + if (event.key === ' ') { + setReplayMark(Date.now()); + } + }; + window.addEventListener('keydown', listener); + return () => { + window.removeEventListener('keydown', listener); + }; + } + }, [canReplayNow]); + let statusIconElement; const statusStyle: React.CSSProperties = {}; let statusOnClick: () => void = () => {}; diff --git a/packages/visualizer/src/component/playground-component.less b/packages/visualizer/src/component/playground-component.less index 2b778bd38..d655dccb1 100644 --- a/packages/visualizer/src/component/playground-component.less +++ b/packages/visualizer/src/component/playground-component.less @@ -86,7 +86,24 @@ body { left: 12px; } } - + + + .loading-container { + display: flex; + flex-direction: column; + align-items: center; + box-sizing: border-box; + padding: @layout-extension-space-vertical @layout-extension-space-horizontal; + + .loading-progress-text { + text-align: center; + width: 100%; + color: @weak-text; + margin-top: 16px; + height: 60px; + } + } + .result-wrapper { width: 100%; height: 100%; diff --git a/packages/visualizer/src/component/playground-component.tsx b/packages/visualizer/src/component/playground-component.tsx index c609bd5d3..48078325d 100644 --- a/packages/visualizer/src/component/playground-component.tsx +++ b/packages/visualizer/src/component/playground-component.tsx @@ -22,6 +22,8 @@ import './playground-component.less'; import Logo from './logo'; import { serverBase, useServerValid } from './open-in-playground'; +import { paramStr, typeStr } from '@midscene/web/ui-utils'; + import { overrideAIConfig } from '@midscene/core'; import type { ChromeExtensionProxyPageAgent } from '@midscene/web/chrome-extension'; import { @@ -163,6 +165,7 @@ export function Playground({ >(undefined); const [loading, setLoading] = useState(false); + const [loadingProgressText, setLoadingProgressText] = useState(''); const [result, setResult] = useState(null); const [form] = Form.useForm(); const { config, serviceMode, setServiceMode } = useEnvConfig(); @@ -247,7 +250,13 @@ export function Playground({ value.prompt, ); } else if (value.type === 'aiAction') { - result.result = await activeAgent?.aiAction(value.prompt); + result.result = await activeAgent?.aiAction(value.prompt, { + onTaskStart: (task) => { + const type = typeStr(task); + const param = paramStr(task); + setLoadingProgressText(`${type}: ${param}`); + }, + }); } else if (value.type === 'aiQuery') { result.result = await activeAgent?.aiQuery(value.prompt); } else if (value.type === 'aiAssert') { @@ -321,10 +330,10 @@ export function Playground({ resultDataToShow = serverLaunchTip; } else if (loading) { resultDataToShow = ( - } - /> +
+ } /> +
{loadingProgressText}
+
); } else if (replayScriptsInfo) { resultDataToShow = ( diff --git a/packages/visualizer/src/component/replay-scripts.tsx b/packages/visualizer/src/component/replay-scripts.tsx index 6ebbaf9d0..76fdde076 100644 --- a/packages/visualizer/src/component/replay-scripts.tsx +++ b/packages/visualizer/src/component/replay-scripts.tsx @@ -1,6 +1,8 @@ 'use client'; import './player.less'; -import { mousePointer, paramStr, typeStr } from '@/utils'; +import { mousePointer } from '@/utils'; +import { paramStr, typeStr } from '@midscene/web/ui-utils'; + import type { ExecutionDump, ExecutionTask, diff --git a/packages/visualizer/src/component/sidebar.tsx b/packages/visualizer/src/component/sidebar.tsx index a090d0759..ba4c60514 100644 --- a/packages/visualizer/src/component/sidebar.tsx +++ b/packages/visualizer/src/component/sidebar.tsx @@ -1,8 +1,8 @@ import './sidebar.less'; import { useAllCurrentTasks, useExecutionDump } from '@/component/store'; -import { typeStr } from '@/utils'; import { MessageOutlined, VideoCameraOutlined } from '@ant-design/icons'; import type { ExecutionTask } from '@midscene/core'; +import { typeStr } from '@midscene/web/ui-utils'; import { useEffect } from 'react'; import { iconForStatus, timeCostStrElement } from './misc'; import PanelTitle from './panel-title'; diff --git a/packages/visualizer/src/utils.ts b/packages/visualizer/src/utils.ts index bd7e2abb1..89b520dd3 100644 --- a/packages/visualizer/src/utils.ts +++ b/packages/visualizer/src/utils.ts @@ -1,14 +1,8 @@ import type { ExecutionDump, - ExecutionTask, - ExecutionTaskAction, - ExecutionTaskInsightAssertion, ExecutionTaskInsightLocate, - ExecutionTaskInsightQuery, - ExecutionTaskPlanning, InsightDump, } from '@midscene/core'; -/* eslint-disable @typescript-eslint/no-empty-function */ import dayjs from 'dayjs'; export function insightDumpToExecutionDump( @@ -68,41 +62,6 @@ export function timeStr(timestamp?: number) { return timestamp ? dayjs(timestamp).format('YYYY-MM-DD HH:mm:ss') : '-'; } -export function typeStr(task: ExecutionTask) { - return task.subType ? `${task.type} / ${task.subType || ''}` : task.type; -} - -export function paramStr(task: ExecutionTask) { - let value: string | undefined | object; - if (task.type === 'Planning') { - value = (task as ExecutionTaskPlanning)?.param?.userPrompt; - } - - if (task.type === 'Insight') { - value = - (task as ExecutionTaskInsightLocate)?.param?.prompt || - (task as ExecutionTaskInsightLocate)?.param?.id || - (task as ExecutionTaskInsightQuery)?.param?.dataDemand || - (task as ExecutionTaskInsightAssertion)?.param?.assertion; - } - - if (task.type === 'Action') { - const sleepMs = (task as ExecutionTaskAction)?.param?.timeMs; - if (sleepMs) { - value = `${sleepMs}ms`; - } else { - value = - (task as ExecutionTaskAction)?.param?.value || - (task as ExecutionTaskAction)?.param?.scrollType; - } - } - - if (typeof value === 'undefined') return ''; - return typeof value === 'string' - ? value - : JSON.stringify(value, undefined, 2); -} - export function filterBase64Value(input: string) { return input.replace(/data:image\/[^"]+"/g, 'data:image..."'); } diff --git a/packages/web-integration/modern.config.ts b/packages/web-integration/modern.config.ts index 5e4beda3e..8043457ba 100644 --- a/packages/web-integration/modern.config.ts +++ b/packages/web-integration/modern.config.ts @@ -8,6 +8,7 @@ export default defineConfig({ input: { index: 'src/index.ts', utils: 'src/common/utils.ts', + 'ui-utils': 'src/common/ui-utils.ts', debug: 'src/debug/index.ts', puppeteer: 'src/puppeteer/index.ts', playwright: 'src/playwright/index.ts', diff --git a/packages/web-integration/package.json b/packages/web-integration/package.json index 7e2d17309..321047d83 100644 --- a/packages/web-integration/package.json +++ b/packages/web-integration/package.json @@ -13,6 +13,7 @@ "exports": { ".": "./dist/lib/index.js", "./utils": "./dist/lib/utils.js", + "./ui-utils": "./dist/lib/ui-utils.js", "./puppeteer": "./dist/lib/puppeteer.js", "./playwright": "./dist/lib/playwright.js", "./playwright-report": "./dist/lib/playwright-report.js", @@ -26,6 +27,7 @@ "*": { ".": ["./dist/types/index.d.ts"], "utils": ["./dist/types/utils.d.ts"], + "ui-utils": ["./dist/types/ui-utils.d.ts"], "puppeteer": ["./dist/types/puppeteer.d.ts"], "playwright": ["./dist/types/playwright.d.ts"], "playwright-report": ["./dist/types/playwright-report.d.ts"], diff --git a/packages/web-integration/src/common/agent.ts b/packages/web-integration/src/common/agent.ts index 60e88ece4..2a9b5460d 100644 --- a/packages/web-integration/src/common/agent.ts +++ b/packages/web-integration/src/common/agent.ts @@ -3,6 +3,7 @@ import { type AgentAssertOpt, type AgentWaitForOpt, type ExecutionDump, + type ExecutionTaskProgressOptions, type GroupedActionDump, Insight, type InsightAction, @@ -141,8 +142,8 @@ export class PageAgent { } } - async aiAction(taskPrompt: string) { - const { executor } = await this.taskExecutor.action(taskPrompt); + async aiAction(taskPrompt: string, options?: ExecutionTaskProgressOptions) { + const { executor } = await this.taskExecutor.action(taskPrompt, options); this.appendExecutionDump(executor.dump()); this.writeOutActionDumps(); diff --git a/packages/web-integration/src/common/tasks.ts b/packages/web-integration/src/common/tasks.ts index 3a05f6e39..760da6ac4 100644 --- a/packages/web-integration/src/common/tasks.ts +++ b/packages/web-integration/src/common/tasks.ts @@ -10,6 +10,7 @@ import { type ExecutionTaskInsightLocateApply, type ExecutionTaskInsightQueryApply, type ExecutionTaskPlanningApply, + type ExecutionTaskProgressOptions, Executor, type Insight, type InsightAssertionResponse, @@ -489,8 +490,13 @@ export class PageTaskExecutor { return task; } - async action(userPrompt: string): Promise { - const taskExecutor = new Executor(userPrompt); + async action( + userPrompt: string, + options?: ExecutionTaskProgressOptions, + ): Promise { + const taskExecutor = new Executor(userPrompt, undefined, undefined, { + onTaskStart: options?.onTaskStart, + }); const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt); const originalPrompt = userPrompt; diff --git a/packages/web-integration/src/common/ui-utils.ts b/packages/web-integration/src/common/ui-utils.ts new file mode 100644 index 000000000..cc8666320 --- /dev/null +++ b/packages/web-integration/src/common/ui-utils.ts @@ -0,0 +1,47 @@ +import type { + ExecutionTask, + ExecutionTaskAction, + ExecutionTaskInsightAssertion, + ExecutionTaskInsightLocate, + ExecutionTaskInsightQuery, + ExecutionTaskPlanning, +} from '@midscene/core'; + +export function typeStr(task: ExecutionTask) { + return task.subType ? `${task.type} / ${task.subType || ''}` : task.type; +} + +export function paramStr(task: ExecutionTask) { + let value: string | undefined | object; + if (task.type === 'Planning') { + value = (task as ExecutionTaskPlanning)?.param?.userPrompt; + } + + if (task.type === 'Insight') { + value = + (task as ExecutionTaskInsightLocate)?.param?.prompt || + (task as ExecutionTaskInsightLocate)?.param?.id || + (task as ExecutionTaskInsightQuery)?.param?.dataDemand || + (task as ExecutionTaskInsightAssertion)?.param?.assertion; + } + + if (task.type === 'Action') { + const sleepMs = (task as ExecutionTaskAction)?.param?.timeMs; + if (sleepMs) { + value = `${sleepMs}ms`; + } else { + value = + (task as ExecutionTaskAction)?.param?.value || + (task as ExecutionTaskAction)?.param?.scrollType; + } + + if (!value) { + value = task.thought; + } + } + + if (typeof value === 'undefined') return ''; + return typeof value === 'string' + ? value + : JSON.stringify(value, undefined, 2); +} diff --git a/packages/web-integration/src/extractor/debug.ts b/packages/web-integration/src/extractor/debug.ts index a436431e7..737d660a8 100644 --- a/packages/web-integration/src/extractor/debug.ts +++ b/packages/web-integration/src/extractor/debug.ts @@ -1,6 +1,10 @@ import { webExtractTextWithPosition } from '.'; -import { setExtractTextWithPositionOnWindow } from './util'; +import { + setExtractTextWithPositionOnWindow, + setMidsceneVisibleRectOnWindow, +} from './util'; console.log(webExtractTextWithPosition(document.body, true)); console.log(JSON.stringify(webExtractTextWithPosition(document.body, true))); setExtractTextWithPositionOnWindow(); +setMidsceneVisibleRectOnWindow(); diff --git a/packages/web-integration/src/extractor/util.ts b/packages/web-integration/src/extractor/util.ts index e2e000618..5e6edd142 100644 --- a/packages/web-integration/src/extractor/util.ts +++ b/packages/web-integration/src/extractor/util.ts @@ -69,24 +69,26 @@ function isElementPartiallyInViewport(rect: ReturnType) { const elementHeight = rect.height; const elementWidth = rect.width; - const viewportHeight = - window.innerHeight || document.documentElement.clientHeight; - const viewportWidth = - window.innerWidth || document.documentElement.clientWidth; + const viewportRect = { + left: 0, + top: 0, + width: window.innerWidth || document.documentElement.clientWidth, + height: window.innerHeight || document.documentElement.clientHeight, + right: window.innerWidth || document.documentElement.clientWidth, + bottom: window.innerHeight || document.documentElement.clientHeight, + x: 0, + y: 0, + zoom: 1, + }; - const visibleHeight = Math.max( - 0, - Math.min(rect.bottom, viewportHeight) - Math.max(rect.top, 0), - ); - const visibleWidth = Math.max( - 0, - Math.min(rect.right, viewportWidth) - Math.max(rect.left, 0), - ); + const overlapRect = overlappedRect(rect, viewportRect); + if (!overlapRect) { + return false; + } - const visibleArea = visibleHeight * visibleWidth; + const visibleArea = overlapRect.width * overlapRect.height; const totalArea = elementHeight * elementWidth; - - return visibleArea / totalArea >= 2 / 3; + return visibleArea > 30 * 30 || visibleArea / totalArea >= 2 / 3; } export function getPseudoElementContent(element: Node): { @@ -129,6 +131,31 @@ export interface ExtractedRect { zoom: number; } +// tell if two rects are overlapped, return the overlapped rect. If not, return null +export function overlappedRect( + rect1: ExtractedRect, + rect2: ExtractedRect, +): ExtractedRect | null { + const left = Math.max(rect1.left, rect2.left); + const top = Math.max(rect1.top, rect2.top); + const right = Math.min(rect1.right, rect2.right); + const bottom = Math.min(rect1.bottom, rect2.bottom); + if (left < right && top < bottom) { + return { + left, + top, + right, + bottom, + width: right - left, + height: bottom - top, + x: left, + y: top, + zoom: 1, + }; + } + return null; +} + export function getRect(el: HTMLElement | Node, baseZoom = 1): ExtractedRect { let originalRect: DOMRect; let newZoom = 1; @@ -166,16 +193,36 @@ const isElementCovered = (el: HTMLElement | Node, rect: ExtractedRect) => { // Gets the element above that point const topElement = document.elementFromPoint(x, y); + if (!topElement) { + return false; // usually because it's outside the screen + } + if (topElement === el) { return false; } if (el?.contains(topElement)) { return false; } + if ((topElement as HTMLElement)?.contains(el)) { return false; } + const rectOfTopElement = getRect(topElement as HTMLElement, 1); + + // get the remaining area of the base element + const overlapRect = overlappedRect(rect, rectOfTopElement); + if (!overlapRect) { + return false; + } + + const remainingArea = + rect.width * rect.height - overlapRect.width * overlapRect.height; + + if (remainingArea > 100) { + return false; + } + logger(el, 'Element is covered by another element', { topElement, el, @@ -230,9 +277,9 @@ export function visibleRect( // check if the element is covered by another element // if the element is zoomed, the coverage check should be done with the original zoom - if (baseZoom === 1 && isElementCovered(el, rect)) { - return false; - } + // if (baseZoom === 1 && isElementCovered(el, rect)) { + // return false; + // } const scrollLeft = window.pageXOffset || document.documentElement.scrollLeft; const scrollTop = window.pageYOffset || document.documentElement.scrollTop; diff --git a/packages/web-integration/src/extractor/web-extractor.ts b/packages/web-integration/src/extractor/web-extractor.ts index d308ed55c..07b3e0da6 100644 --- a/packages/web-integration/src/extractor/web-extractor.ts +++ b/packages/web-integration/src/extractor/web-extractor.ts @@ -36,19 +36,15 @@ let indexId = 0; function collectElementInfo( node: Node, nodePath: string, + rect: { + left: number; + top: number; + width: number; + height: number; + zoom: number; + }, baseZoom = 1, ): WebElementInfo | null { - const rect = visibleRect(node, baseZoom); - logger('collectElementInfo', node, node.nodeName, rect); - if ( - !rect || - rect.width < CONTAINER_MINI_WIDTH || - rect.height < CONTAINER_MINI_HEIGHT - ) { - logger('Element is not visible', node); - return null; - } - if (isFormElement(node)) { const attributes = getNodeAttributes(node); let valueContent = @@ -250,7 +246,21 @@ export function extractTextWithPosition( return null; } - const elementInfo = collectElementInfo(node, nodePath, baseZoom); + const nodeVisibleRect = visibleRect(node, baseZoom); + if ( + !nodeVisibleRect || + nodeVisibleRect.width < CONTAINER_MINI_WIDTH || + nodeVisibleRect.height < CONTAINER_MINI_HEIGHT + ) { + return null; + } + + const elementInfo = collectElementInfo( + node, + nodePath, + nodeVisibleRect, + baseZoom, + ); // stop collecting if the node is a Button or Image if ( elementInfo?.nodeType === NodeType.BUTTON || diff --git a/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts b/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts index f36e0831d..e925eb627 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts @@ -13,10 +13,15 @@ describe( cacheId: 'puppeteer(Sauce Demo by Swag Lab)', }); + const onTaskStart = vi.fn(); + await mid.aiAction( 'type "standard_user" in user name input, type "secret_sauce" in password, click "Login", sleep 1s', + { onTaskStart: onTaskStart as any }, ); + expect(onTaskStart.mock.calls.length).toBeGreaterThan(1); + await expect(async () => { await mid.aiWaitFor('there is a cookie prompt in the UI', { timeoutMs: 10 * 1000, diff --git a/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap b/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap index 872a0d814..8776c3c0c 100644 --- a/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap +++ b/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap @@ -474,6 +474,18 @@ exports[`extractor > basic 1`] = ` }, "content": "", }, + { + "attributes": { + "nodeType": "TEXT Node", + }, + "content": "content Left", + }, + { + "attributes": { + "nodeType": "TEXT Node", + }, + "content": "content Right", + }, { "attributes": { "frameborder": "0", diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/index.html b/packages/web-integration/tests/unit-test/fixtures/web-extractor/index.html index 99c767025..2f1b1c7c7 100644 --- a/packages/web-integration/tests/unit-test/fixtures/web-extractor/index.html +++ b/packages/web-integration/tests/unit-test/fixtures/web-extractor/index.html @@ -272,6 +272,36 @@

Form

+ + + + + + +
+
+ content Left +
+
+ content Right +
+
+ diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png index 392adc9e0..39a6e1913 100644 Binary files a/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png and b/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png differ diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png index db278b42f..a58902110 100644 Binary files a/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png and b/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png differ diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png index 753b1d86c..7660709e6 100644 Binary files a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png and b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png differ diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png index 42dea3688..7ba11d78a 100644 Binary files a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png and b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png differ