web-infra-dev · yuyutaotao · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/.github/workflows/ai.yml b/.github/workflows/ai.yml
@@ -22,7 +22,7 @@ jobs:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
       MIDSCENE_MODEL_NAME: gpt-4o-2024-08-06
-      MIDSCENE_DEBUG_AI_PROFILE: 1
+      # MIDSCENE_DEBUG_AI_PROFILE: 1
 
     steps:
     - uses: actions/checkout@v4

diff --git a/packages/cli/src/printer.ts b/packages/cli/src/printer.ts
@@ -30,8 +30,9 @@ export const flowItemBrief = (flowItem?: MidsceneYamlFlowItem) => {
   }
 
   const sliceText = (text?: string) => {
-    if (text && text.length > 12) {
-      return `${text.slice(0, 12)}...`;
+    const lengthLimit = 60;
+    if (text && text.length > lengthLimit) {
+      return `${text.slice(0, lengthLimit)}...`;
     }
 
     return text || '';
@@ -42,7 +43,8 @@ export const flowItemBrief = (flowItem?: MidsceneYamlFlowItem) => {
     (flowItem as MidsceneYamlFlowItemAIAction).ai
   ) {
     return `aiAction: ${sliceText(
-      (flowItem as MidsceneYamlFlowItemAIAction).aiAction ||
+      (flowItem as MidsceneYamlFlowItemAIAction).aiActionProgressTip ||
+        (flowItem as MidsceneYamlFlowItemAIAction).aiAction ||
         (flowItem as MidsceneYamlFlowItemAIAction).ai,
     )}`;
   }
@@ -104,7 +106,7 @@ export const contextInfo = (context: MidsceneYamlFileContext) => {
   const reportFile = context.player.reportFile;
   const reportFileToShow = relative(process.cwd(), reportFile || '');
   const reportText = reportFile
-    ? `\n${indent}${chalk.gray(`report: ${reportFileToShow}`)}`
+    ? `\n${indent}${chalk.gray(`report: ./${reportFileToShow}`)}`
     : '';
 
   const mergedText =

diff --git a/packages/cli/src/types.d.ts b/packages/cli/src/types.d.ts
@@ -16,6 +16,7 @@ export interface MidsceneYamlScriptEnv {
 export interface MidsceneYamlFlowItemAIAction {
   ai?: string; // this is the shortcut for aiAction
   aiAction?: string;
+  aiActionProgressTip?: string;
 }
 
 export interface MidsceneYamlFlowItemAIAssert {

diff --git a/packages/cli/src/yaml-player.ts b/packages/cli/src/yaml-player.ts
@@ -7,6 +7,8 @@ import assert from 'node:assert';
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
 import { basename, dirname, extname, join } from 'node:path';
 import { PuppeteerAgent } from '@midscene/web/puppeteer';
+import { paramStr, typeStr } from '@midscene/web/ui-utils';
+
 import {
   contextInfo,
   contextTaskListSummary,
@@ -116,14 +118,14 @@ export async function playYamlFiles(
 
     ttyRenderer.start();
     for (const context of fileContextList) {
-      await context.player.play();
+      await context.player.run();
     }
     ttyRenderer.stop();
   } else {
     for (const context of fileContextList) {
       const { mergedText } = contextInfo(context);
       console.log(mergedText);
-      await context.player.play();
+      await context.player.run();
       console.log(contextTaskListSummary(context.player.taskStatus, context));
     }
   }
@@ -220,7 +222,13 @@ export class ScriptPlayer {
           typeof prompt === 'string',
           'prompt for aiAction must be a string',
         );
-        await agent.aiAction(prompt);
+        await agent.aiAction(prompt, {
+          onTaskStart(task) {
+            const tip = `${typeStr(task)} - ${paramStr(task)}`;
+            (flowItem as MidsceneYamlFlowItemAIAction).aiActionProgressTip =
+              tip;
+          },
+        });
       } else if ((flowItem as MidsceneYamlFlowItemAIAssert).aiAssert) {
         const assertTask = flowItem as MidsceneYamlFlowItemAIAssert;
         const prompt = assertTask.aiAssert;
@@ -273,7 +281,7 @@ export class ScriptPlayer {
     this.reportFile = agent.reportFile;
   }
 
-  async play() {
+  async run() {
     const { target, tasks } = this.script;
     this.setPlayerStatus('running');
 

diff --git a/packages/cli/tests/__snapshots__/printer.test.ts.snap b/packages/cli/tests/__snapshots__/printer.test.ts.snap
@@ -1,7 +1,7 @@
 // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 
-exports[`printer > action brief text 1`] = `"aiAction: search for w..."`;
+exports[`printer > action brief text 1`] = `"aiAction: search for weather"`;
 
 exports[`printer > action brief text 2`] = `"sleep: 1000"`;
 
-exports[`printer > action brief text 3`] = `"aiWaitFor: wait for som..."`;
+exports[`printer > action brief text 3`] = `"aiWaitFor: wait for something"`;
diff --git a/packages/cli/tests/midscene_scripts/sub/bing.yaml b/packages/cli/tests/midscene_scripts/sub/bing.yaml
@@ -1,5 +1,5 @@
 target:
-  url: https://www.baidu.com
+  url: https://www.bing.com
 tasks:
   - name: search weather
     flow:

diff --git a/packages/cli/tests/yaml.test.ts b/packages/cli/tests/yaml.test.ts
@@ -7,7 +7,7 @@ import { assert, describe, expect, test, vi } from 'vitest';
 const runYaml = async (yamlString: string) => {
   const script = loadYamlScript(yamlString);
   const player = new ScriptPlayer(script);
-  await player.play();
+  await player.run();
   assert(
     player.status === 'done',
     player.errorInSetup?.message || 'unknown error',

diff --git a/packages/midscene/src/action/executor.ts b/packages/midscene/src/action/executor.ts
@@ -5,6 +5,7 @@ import type {
   ExecutionTask,
   ExecutionTaskApply,
   ExecutionTaskInsightLocateOutput,
+  ExecutionTaskProgressOptions,
   ExecutionTaskReturn,
   ExecutorContext,
 } from '@/types';
@@ -20,19 +21,19 @@ export class Executor {
   // status of executor
   status: 'init' | 'pending' | 'running' | 'completed' | 'error';
 
-  onFlushUpdate?: () => void;
+  onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
 
   constructor(
     name: string,
     description?: string,
     tasks?: ExecutionTaskApply[],
-    onFlushUpdate?: () => void,
+    options?: ExecutionTaskProgressOptions,
   ) {
     this.status = tasks && tasks.length > 0 ? 'pending' : 'init';
     this.name = name;
     this.description = description;
     this.tasks = (tasks || []).map((item) => this.markTaskAsPending(item));
-    this.onFlushUpdate = onFlushUpdate;
+    this.onTaskStart = options?.onTaskStart;
   }
 
   private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {
@@ -84,13 +85,6 @@ export class Executor {
 
     while (taskIndex < this.tasks.length) {
       const task = this.tasks[taskIndex];
-      try {
-        if (this.onFlushUpdate) {
-          this.onFlushUpdate();
-        }
-      } catch (e) {
-        // console.error('error in onFlushUpdate', e);
-      }
       assert(
         task.status === 'pending',
         `task status should be pending, but got: ${task.status}`,
@@ -100,6 +94,13 @@ export class Executor {
       };
       try {
         task.status = 'running';
+        try {
+          if (this.onTaskStart) {
+            await this.onTaskStart(task);
+          }
+        } catch (e) {
+          // console.error('error in onTaskStart', e);
+        }
         assert(
           ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,
           `unsupported task type: ${task.type}`,
@@ -162,9 +163,7 @@ export class Executor {
     } else {
       this.status = 'error';
     }
-    if (this.onFlushUpdate) {
-      await this.onFlushUpdate();
-    }
+
     if (this.tasks.length) {
       // return the last output
       const outputIndex = Math.min(taskIndex, this.tasks.length - 1);

diff --git a/packages/midscene/src/insight/index.ts b/packages/midscene/src/insight/index.ts
@@ -96,7 +96,7 @@ export default class Insight<
     const startTime = Date.now();
     const { parseResult, elementById, rawResponse, usage } =
       await AiInspectElement({
-        callAI,
+        callAI: callAI || this.aiVendorFn,
         context,
         multi: Boolean(multi),
         targetElementDescription: queryPrompt,

diff --git a/packages/midscene/src/types.ts b/packages/midscene/src/types.ts
@@ -313,6 +313,9 @@ export interface PlaywrightParserOpt extends BaseAgentParserOpt {}
 /*
 action
 */
+export interface ExecutionTaskProgressOptions {
+  onTaskStart?: (task: ExecutionTask) => Promise<void> | void;
+}
 
 export interface ExecutionRecorderItem {
   type: 'screenshot';

diff --git a/packages/midscene/tests/ai/extract/extract.test.ts b/packages/midscene/tests/ai/extract/extract.test.ts
@@ -8,8 +8,6 @@ vi.setConfig({
   hookTimeout: 30 * 1000,
 });
 
-const useModel = undefined;
-
 const modelList: Array<'openAI' | 'coze'> = ['openAI'];
 
 if (preferCozeModel('coze')) {

diff --git a/...executor/__snapshots__/index.test.ts.snap → ...executor/__snapshots__/index.test.ts.snap b/...executor/__snapshots__/index.test.ts.snap → ...executor/__snapshots__/index.test.ts.snap
diff --git a/.../midscene/tests/ai/executor/index.test.ts → ...ne/tests/unit-test/executor/index.test.ts b/.../midscene/tests/ai/executor/index.test.ts → ...ne/tests/unit-test/executor/index.test.ts
@@ -23,6 +23,7 @@ const insightFindTask = (shouldThrow?: boolean) => {
     param: {
       prompt: 'test',
     },
+    locate: null,
     async executor(param, taskContext) {
       if (shouldThrow) {
         const { task } = taskContext;
@@ -46,70 +47,58 @@ const insightFindTask = (shouldThrow?: boolean) => {
   return insightFindTask;
 };
 
-vi.setConfig({
-  testTimeout: 40 * 1000,
-});
-
 describe('executor', () => {
-  it(
-    'insight - basic run',
-    async () => {
-      const insightTask1 = insightFindTask();
-      const flushResultData = 'abcdef';
-      const taskParam = {
-        action: 'tap',
-        anything: 'acceptable',
-      };
-      const tapperFn = vi.fn();
-      const actionTask: ExecutionTaskActionApply = {
-        type: 'Action',
-        param: taskParam,
-        executor: tapperFn,
-      };
-      const actionTask2: ExecutionTaskActionApply = {
-        type: 'Action',
-        param: taskParam,
-        executor: async () => {
-          return {
-            output: flushResultData,
-          } as any;
-        },
-      };
+  it('insight - basic run', async () => {
+    const insightTask1 = insightFindTask();
+    const flushResultData = 'abcdef';
+    const taskParam = {
+      action: 'tap',
+      anything: 'acceptable',
+    };
+    const tapperFn = vi.fn();
+    const actionTask: ExecutionTaskActionApply = {
+      type: 'Action',
+      param: taskParam,
+      locate: null,
+      executor: tapperFn,
+    };
+    const actionTask2: ExecutionTaskActionApply = {
+      type: 'Action',
+      param: taskParam,
+      locate: null,
+      executor: async () => {
+        return {
+          output: flushResultData,
+        } as any;
+      },
+    };
 
-      const inputTasks = [insightTask1, actionTask, actionTask2];
-
-      const executor = new Executor(
-        'test',
-        'hello, this is a test',
-        inputTasks,
-      );
-      const flushResult = await executor.flush();
-      const tasks = executor.tasks as ExecutionTaskInsightLocate[];
-      const { element } = tasks[0].output || {};
-      expect(element).toBeTruthy();
-
-      expect(tasks.length).toBe(inputTasks.length);
-      expect(tasks[0].status).toBe('finished');
-      expect(tasks[0].output).toMatchSnapshot();
-      expect(tasks[0].log?.dump).toBeTruthy();
-      expect(tasks[0].timing?.end).toBeTruthy();
-      expect(tasks[0].cache).toBeTruthy();
-      expect(tasks[0].cache?.hit).toEqual(false);
-
-      expect(tapperFn).toBeCalledTimes(1);
-      expect(tapperFn.mock.calls[0][0]).toBe(taskParam);
-      expect(tapperFn.mock.calls[0][1].element).toBe(element);
-      expect(tapperFn.mock.calls[0][1].task).toBeTruthy();
-
-      const dump = executor.dump();
-      expect(dump.logTime).toBeTruthy();
-
-      expect(flushResult).toBe(flushResultData);
-    },
-    {
-      timeout: 999 * 1000,
-    },
-  );
+    const inputTasks = [insightTask1, actionTask, actionTask2];
+
+    const executor = new Executor('test', 'hello, this is a test', inputTasks);
+    const flushResult = await executor.flush();
+    const tasks = executor.tasks as ExecutionTaskInsightLocate[];
+    expect(executor.isInErrorState()).toBeFalsy();
+    const { element } = tasks[0].output || {};
+    expect(element).toBeTruthy();
+
+    expect(tasks.length).toBe(inputTasks.length);
+    expect(tasks[0].status).toBe('finished');
+    expect(tasks[0].output).toMatchSnapshot();
+    expect(tasks[0].log?.dump).toBeTruthy();
+    expect(tasks[0].timing?.end).toBeTruthy();
+    expect(tasks[0].cache).toBeTruthy();
+    expect(tasks[0].cache?.hit).toEqual(false);
+
+    expect(tapperFn).toBeCalledTimes(1);
+    expect(tapperFn.mock.calls[0][0]).toBe(taskParam);
+    expect(tapperFn.mock.calls[0][1].task).toBeTruthy();
+
+    const dump = executor.dump();
+    expect(dump.logTime).toBeTruthy();
+
+    expect(flushResult).toBe(flushResultData);
+  });
 
   it('insight - init and append', async () => {
     const initExecutor = new Executor('test');
@@ -123,6 +112,7 @@ describe('executor', () => {
         action: 'tap',
         element: 'previous',
       },
+      locate: null,
       executor: async () => {
         // delay 500
         await new Promise((resolve) => setTimeout(resolve, 500));

diff --git a/packages/midscene/tests/utils.ts b/packages/midscene/tests/utils.ts
@@ -1,13 +1,13 @@
 import { readFileSync, writeFileSync } from 'node:fs';
-/* eslint-disable @typescript-eslint/no-magic-numbers */
 import path, { join } from 'node:path';
+import type { callAiFn } from '@/ai-model/common';
 import {
   base64Encoded,
   imageInfoOfBase64,
   transformImgPathToBase64,
 } from '@/image';
 import Insight from '@/insight';
-import type { BaseElement, UIContext } from '@/types';
+import type { AIElementIdResponse, BaseElement, UIContext } from '@/types';
 import { vi } from 'vitest';
 
 export function getFixture(name: string) {
@@ -45,16 +45,18 @@ export function fakeInsight(content: string) {
         center: [250, 250],
         tap: vi.fn() as unknown,
       },
-      // describer: basicPa
     ] as unknown as BaseElement[],
   };
   const context: UIContext = {
     ...basicContext,
   };
 
-  const aiVendor = () => ({
-    elements: [{ id: '0' }],
-    errors: [],
+  const aiVendor: typeof callAiFn<AIElementIdResponse> = async () => ({
+    content: {
+      elements: [{ id: '0', reason: '', text: '' }],
+      errors: [],
+    },
+    usage: undefined,
   });
 
   const insight = new Insight(context, {