Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: reveal overlapped content #178

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
MIDSCENE_MODEL_NAME: gpt-4o-2024-08-06
MIDSCENE_DEBUG_AI_PROFILE: 1
# MIDSCENE_DEBUG_AI_PROFILE: 1

steps:
- uses: actions/checkout@v4
Expand Down
10 changes: 6 additions & 4 deletions packages/cli/src/printer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ export const flowItemBrief = (flowItem?: MidsceneYamlFlowItem) => {
}

const sliceText = (text?: string) => {
if (text && text.length > 12) {
return `${text.slice(0, 12)}...`;
const lengthLimit = 60;
if (text && text.length > lengthLimit) {
return `${text.slice(0, lengthLimit)}...`;
}

return text || '';
Expand All @@ -42,7 +43,8 @@ export const flowItemBrief = (flowItem?: MidsceneYamlFlowItem) => {
(flowItem as MidsceneYamlFlowItemAIAction).ai
) {
return `aiAction: ${sliceText(
(flowItem as MidsceneYamlFlowItemAIAction).aiAction ||
(flowItem as MidsceneYamlFlowItemAIAction).aiActionProgressTip ||
(flowItem as MidsceneYamlFlowItemAIAction).aiAction ||
(flowItem as MidsceneYamlFlowItemAIAction).ai,
)}`;
}
Expand Down Expand Up @@ -104,7 +106,7 @@ export const contextInfo = (context: MidsceneYamlFileContext) => {
const reportFile = context.player.reportFile;
const reportFileToShow = relative(process.cwd(), reportFile || '');
const reportText = reportFile
? `\n${indent}${chalk.gray(`report: ${reportFileToShow}`)}`
? `\n${indent}${chalk.gray(`report: ./${reportFileToShow}`)}`
: '';

const mergedText =
Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export interface MidsceneYamlScriptEnv {
export interface MidsceneYamlFlowItemAIAction {
ai?: string; // this is the shortcut for aiAction
aiAction?: string;
aiActionProgressTip?: string;
}

export interface MidsceneYamlFlowItemAIAssert {
Expand Down
16 changes: 12 additions & 4 deletions packages/cli/src/yaml-player.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import assert from 'node:assert';
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { basename, dirname, extname, join } from 'node:path';
import { PuppeteerAgent } from '@midscene/web/puppeteer';
import { paramStr, typeStr } from '@midscene/web/ui-utils';

import {
contextInfo,
contextTaskListSummary,
Expand Down Expand Up @@ -116,14 +118,14 @@ export async function playYamlFiles(

ttyRenderer.start();
for (const context of fileContextList) {
await context.player.play();
await context.player.run();
}
ttyRenderer.stop();
} else {
for (const context of fileContextList) {
const { mergedText } = contextInfo(context);
console.log(mergedText);
await context.player.play();
await context.player.run();
console.log(contextTaskListSummary(context.player.taskStatus, context));
}
}
Expand Down Expand Up @@ -220,7 +222,13 @@ export class ScriptPlayer {
typeof prompt === 'string',
'prompt for aiAction must be a string',
);
await agent.aiAction(prompt);
await agent.aiAction(prompt, {
onTaskStart(task) {
const tip = `${typeStr(task)} - ${paramStr(task)}`;
(flowItem as MidsceneYamlFlowItemAIAction).aiActionProgressTip =
tip;
},
});
} else if ((flowItem as MidsceneYamlFlowItemAIAssert).aiAssert) {
const assertTask = flowItem as MidsceneYamlFlowItemAIAssert;
const prompt = assertTask.aiAssert;
Expand Down Expand Up @@ -273,7 +281,7 @@ export class ScriptPlayer {
this.reportFile = agent.reportFile;
}

async play() {
async run() {
const { target, tasks } = this.script;
this.setPlayerStatus('running');

Expand Down
4 changes: 2 additions & 2 deletions packages/cli/tests/__snapshots__/printer.test.ts.snap
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html

exports[`printer > action brief text 1`] = `"aiAction: search for w..."`;
exports[`printer > action brief text 1`] = `"aiAction: search for weather"`;

exports[`printer > action brief text 2`] = `"sleep: 1000"`;

exports[`printer > action brief text 3`] = `"aiWaitFor: wait for som..."`;
exports[`printer > action brief text 3`] = `"aiWaitFor: wait for something"`;
2 changes: 1 addition & 1 deletion packages/cli/tests/midscene_scripts/sub/bing.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
target:
url: https://www.baidu.com
url: https://www.bing.com
tasks:
- name: search weather
flow:
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/tests/yaml.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { assert, describe, expect, test, vi } from 'vitest';
const runYaml = async (yamlString: string) => {
const script = loadYamlScript(yamlString);
const player = new ScriptPlayer(script);
await player.play();
await player.run();
assert(
player.status === 'done',
player.errorInSetup?.message || 'unknown error',
Expand Down
25 changes: 12 additions & 13 deletions packages/midscene/src/action/executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type {
ExecutionTask,
ExecutionTaskApply,
ExecutionTaskInsightLocateOutput,
ExecutionTaskProgressOptions,
ExecutionTaskReturn,
ExecutorContext,
} from '@/types';
Expand All @@ -20,19 +21,19 @@ export class Executor {
// status of executor
status: 'init' | 'pending' | 'running' | 'completed' | 'error';

onFlushUpdate?: () => void;
onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];

constructor(
name: string,
description?: string,
tasks?: ExecutionTaskApply[],
onFlushUpdate?: () => void,
options?: ExecutionTaskProgressOptions,
) {
this.status = tasks && tasks.length > 0 ? 'pending' : 'init';
this.name = name;
this.description = description;
this.tasks = (tasks || []).map((item) => this.markTaskAsPending(item));
this.onFlushUpdate = onFlushUpdate;
this.onTaskStart = options?.onTaskStart;
}

private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {
Expand Down Expand Up @@ -84,13 +85,6 @@ export class Executor {

while (taskIndex < this.tasks.length) {
const task = this.tasks[taskIndex];
try {
if (this.onFlushUpdate) {
this.onFlushUpdate();
}
} catch (e) {
// console.error('error in onFlushUpdate', e);
}
assert(
task.status === 'pending',
`task status should be pending, but got: ${task.status}`,
Expand All @@ -100,6 +94,13 @@ export class Executor {
};
try {
task.status = 'running';
try {
if (this.onTaskStart) {
await this.onTaskStart(task);
}
} catch (e) {
// console.error('error in onTaskStart', e);
}
assert(
['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,
`unsupported task type: ${task.type}`,
Expand Down Expand Up @@ -162,9 +163,7 @@ export class Executor {
} else {
this.status = 'error';
}
if (this.onFlushUpdate) {
await this.onFlushUpdate();
}

if (this.tasks.length) {
// return the last output
const outputIndex = Math.min(taskIndex, this.tasks.length - 1);
Expand Down
2 changes: 1 addition & 1 deletion packages/midscene/src/insight/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ export default class Insight<
const startTime = Date.now();
const { parseResult, elementById, rawResponse, usage } =
await AiInspectElement({
callAI,
callAI: callAI || this.aiVendorFn,
context,
multi: Boolean(multi),
targetElementDescription: queryPrompt,
Expand Down
3 changes: 3 additions & 0 deletions packages/midscene/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,9 @@ export interface PlaywrightParserOpt extends BaseAgentParserOpt {}
/*
action
*/
export interface ExecutionTaskProgressOptions {
onTaskStart?: (task: ExecutionTask) => Promise<void> | void;
}

export interface ExecutionRecorderItem {
type: 'screenshot';
Expand Down
2 changes: 0 additions & 2 deletions packages/midscene/tests/ai/extract/extract.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ vi.setConfig({
hookTimeout: 30 * 1000,
});

const useModel = undefined;

const modelList: Array<'openAI' | 'coze'> = ['openAI'];

if (preferCozeModel('coze')) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const insightFindTask = (shouldThrow?: boolean) => {
param: {
prompt: 'test',
},
locate: null,
async executor(param, taskContext) {
if (shouldThrow) {
const { task } = taskContext;
Expand All @@ -46,70 +47,58 @@ const insightFindTask = (shouldThrow?: boolean) => {
return insightFindTask;
};

vi.setConfig({
testTimeout: 40 * 1000,
});

describe('executor', () => {
it(
'insight - basic run',
async () => {
const insightTask1 = insightFindTask();
const flushResultData = 'abcdef';
const taskParam = {
action: 'tap',
anything: 'acceptable',
};
const tapperFn = vi.fn();
const actionTask: ExecutionTaskActionApply = {
type: 'Action',
param: taskParam,
executor: tapperFn,
};
const actionTask2: ExecutionTaskActionApply = {
type: 'Action',
param: taskParam,
executor: async () => {
return {
output: flushResultData,
} as any;
},
};
it('insight - basic run', async () => {
const insightTask1 = insightFindTask();
const flushResultData = 'abcdef';
const taskParam = {
action: 'tap',
anything: 'acceptable',
};
const tapperFn = vi.fn();
const actionTask: ExecutionTaskActionApply = {
type: 'Action',
param: taskParam,
locate: null,
executor: tapperFn,
};
const actionTask2: ExecutionTaskActionApply = {
type: 'Action',
param: taskParam,
locate: null,
executor: async () => {
return {
output: flushResultData,
} as any;
},
};

const inputTasks = [insightTask1, actionTask, actionTask2];

const executor = new Executor(
'test',
'hello, this is a test',
inputTasks,
);
const flushResult = await executor.flush();
const tasks = executor.tasks as ExecutionTaskInsightLocate[];
const { element } = tasks[0].output || {};
expect(element).toBeTruthy();

expect(tasks.length).toBe(inputTasks.length);
expect(tasks[0].status).toBe('finished');
expect(tasks[0].output).toMatchSnapshot();
expect(tasks[0].log?.dump).toBeTruthy();
expect(tasks[0].timing?.end).toBeTruthy();
expect(tasks[0].cache).toBeTruthy();
expect(tasks[0].cache?.hit).toEqual(false);

expect(tapperFn).toBeCalledTimes(1);
expect(tapperFn.mock.calls[0][0]).toBe(taskParam);
expect(tapperFn.mock.calls[0][1].element).toBe(element);
expect(tapperFn.mock.calls[0][1].task).toBeTruthy();

const dump = executor.dump();
expect(dump.logTime).toBeTruthy();

expect(flushResult).toBe(flushResultData);
},
{
timeout: 999 * 1000,
},
);
const inputTasks = [insightTask1, actionTask, actionTask2];

const executor = new Executor('test', 'hello, this is a test', inputTasks);
const flushResult = await executor.flush();
const tasks = executor.tasks as ExecutionTaskInsightLocate[];
expect(executor.isInErrorState()).toBeFalsy();
const { element } = tasks[0].output || {};
expect(element).toBeTruthy();

expect(tasks.length).toBe(inputTasks.length);
expect(tasks[0].status).toBe('finished');
expect(tasks[0].output).toMatchSnapshot();
expect(tasks[0].log?.dump).toBeTruthy();
expect(tasks[0].timing?.end).toBeTruthy();
expect(tasks[0].cache).toBeTruthy();
expect(tasks[0].cache?.hit).toEqual(false);

expect(tapperFn).toBeCalledTimes(1);
expect(tapperFn.mock.calls[0][0]).toBe(taskParam);
expect(tapperFn.mock.calls[0][1].task).toBeTruthy();

const dump = executor.dump();
expect(dump.logTime).toBeTruthy();

expect(flushResult).toBe(flushResultData);
});

it('insight - init and append', async () => {
const initExecutor = new Executor('test');
Expand All @@ -123,6 +112,7 @@ describe('executor', () => {
action: 'tap',
element: 'previous',
},
locate: null,
executor: async () => {
// delay 500
await new Promise((resolve) => setTimeout(resolve, 500));
Expand Down
14 changes: 8 additions & 6 deletions packages/midscene/tests/utils.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { readFileSync, writeFileSync } from 'node:fs';
/* eslint-disable @typescript-eslint/no-magic-numbers */
import path, { join } from 'node:path';
import type { callAiFn } from '@/ai-model/common';
import {
base64Encoded,
imageInfoOfBase64,
transformImgPathToBase64,
} from '@/image';
import Insight from '@/insight';
import type { BaseElement, UIContext } from '@/types';
import type { AIElementIdResponse, BaseElement, UIContext } from '@/types';
import { vi } from 'vitest';

export function getFixture(name: string) {
Expand Down Expand Up @@ -45,16 +45,18 @@ export function fakeInsight(content: string) {
center: [250, 250],
tap: vi.fn() as unknown,
},
// describer: basicPa
] as unknown as BaseElement[],
};
const context: UIContext = {
...basicContext,
};

const aiVendor = () => ({
elements: [{ id: '0' }],
errors: [],
const aiVendor: typeof callAiFn<AIElementIdResponse> = async () => ({
content: {
elements: [{ id: '0', reason: '', text: '' }],
errors: [],
},
usage: undefined,
});

const insight = new Insight(context, {
Expand Down
Loading
Loading