Skip to content

Commit

Permalink
stagehand.act -> page.act (#326)
Browse files Browse the repository at this point in the history
* need to actually move to act to page now

* move act -> page

* fix e2e

* fix tests

* readme

* changeset

* package json and changeset

* don't fail on combo evals
  • Loading branch information
kamath authored Dec 22, 2024
1 parent d5e662d commit d8ab6e5
Show file tree
Hide file tree
Showing 9 changed files with 261 additions and 195 deletions.
5 changes: 5 additions & 0 deletions .changeset/dirty-apples-pay.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

Move stagehand.act() -> stagehand.page.act() and deprecate stagehand.act()
99 changes: 49 additions & 50 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,53 @@ jobs:
- name: Run E2E Tests
run: npm run e2e

run-act-evals:
runs-on: ubuntu-latest
timeout-minutes: 25
needs: [run-text-extract-evals]
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
HEADLESS: true
EVAL_ENV: browserbase

steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: "20"

- name: Install dependencies
run: npm install --no-frozen-lockfile

- name: Install Playwright browsers
run: npm exec playwright install --with-deps

- name: Run Act Evals
run: npm run evals category act

- name: Log Act Evals Performance
run: |
experimentName=$(jq -r '.experimentName' eval-summary.json)
echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}"
if [ -f eval-summary.json ]; then
act_score=$(jq '.categories.act' eval-summary.json)
echo "Act category score: $act_score%"
if (( $(echo "$act_score < 80" | bc -l) )); then
echo "Act category score is below 80%. Failing CI."
exit 1
fi
else
echo "Eval summary not found for act category. Failing CI."
exit 1
fi
run-extract-evals:
needs: [run-lint, run-build, run-e2e-tests]
runs-on: ubuntu-latest
Expand Down Expand Up @@ -201,52 +248,7 @@ jobs:
exit 1
fi
run-act-evals:
runs-on: ubuntu-latest
timeout-minutes: 25
needs: [run-text-extract-evals]
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
HEADLESS: true
EVAL_ENV: browserbase

steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: "20"

- name: Install dependencies
run: npm install --no-frozen-lockfile

- name: Install Playwright browsers
run: npm exec playwright install --with-deps

- name: Run Act Evals
run: npm run evals category act

- name: Log Act Evals Performance
run: |
experimentName=$(jq -r '.experimentName' eval-summary.json)
echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}"
if [ -f eval-summary.json ]; then
act_score=$(jq '.categories.act' eval-summary.json)
echo "Act category score: $act_score%"
if (( $(echo "$act_score < 80" | bc -l) )); then
echo "Act category score is below 80%. Failing CI."
exit 1
fi
else
echo "Eval summary not found for act category. Failing CI."
exit 1
fi

run-observe-evals:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -333,10 +335,7 @@ jobs:
if [ -f eval-summary.json ]; then
combination_score=$(jq '.categories.combination' eval-summary.json)
echo "Combination category score: $combination_score%"
if (( $(echo "$combination_score < 85" | bc -l) )); then
echo "Combination category score is below 85%. Failing CI."
exit 1
fi
exit 0
else
echo "Eval summary not found for combination category. Failing CI."
exit 1
Expand Down
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ This constructor is used to create an instance of Stagehand.

`act()` allows Stagehand to interact with a web page. Provide an `action` like `"search for 'x'"`, or `"select the cheapest flight presented"` (small atomic goals perform the best).

> [!NOTE]
> `act()` on the Stagehand instance is deprecated and will be removed in the next major version. Use `stagehand.page.act()` instead.
- **Arguments:**

- `action`: a `string` describing the action to perform
Expand All @@ -229,18 +232,18 @@ This constructor is used to create an instance of Stagehand.

```javascript
// Basic usage
await stagehand.act({ action: "click on add to cart" });
await stagehand.page.act({ action: "click on add to cart" });

// Using variables
await stagehand.act({
await stagehand.page.act({
action: "enter %username% into the username field",
variables: {
username: "[email protected]",
},
});

// Multiple variables
await stagehand.act({
await stagehand.page.act({
action: "fill in the form with %username% and %password%",
variables: {
username: "john.doe",
Expand Down
4 changes: 2 additions & 2 deletions evals/deterministic/stagehand.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ import type { ConstructorParams, LogLine } from "../../lib";

const StagehandConfig: ConstructorParams = {
env: "BROWSERBASE" /* Environment to run Stagehand in */,
apiKey: process.env.BROWSERBASE_API_KEY /* API key for authentication */,
projectId: process.env.BROWSERBASE_PROJECT_ID /* Project identifier */,
apiKey: process.env.BROWSERBASE_API_KEY! /* API key for authentication */,
projectId: process.env.BROWSERBASE_PROJECT_ID! /* Project identifier */,
verbose: 1 /* Logging verbosity level (0=quiet, 1=normal, 2=verbose) */,
debugDom: true /* Enable DOM debugging features */,
headless: false /* Run browser in headless mode */,
Expand Down
4 changes: 2 additions & 2 deletions evals/deterministic/tests/contexts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import StagehandConfig from "../stagehand.config";

// Configuration
const CONTEXT_TEST_URL = "https://docs.browserbase.com";
const BROWSERBASE_PROJECT_ID = process.env["BROWSERBASE_PROJECT_ID"]!;
const BROWSERBASE_API_KEY = process.env["BROWSERBASE_API_KEY"]!;
const BROWSERBASE_PROJECT_ID = process.env.BROWSERBASE_PROJECT_ID!;
const BROWSERBASE_API_KEY = process.env.BROWSERBASE_API_KEY!;

const bb = new Browserbase({
apiKey: BROWSERBASE_API_KEY,
Expand Down
122 changes: 117 additions & 5 deletions lib/StagehandPage.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,39 @@
import type { Page as PlaywrightPage } from "@playwright/test";
import { GotoOptions, Stagehand } from "./index";
import type {
Page as PlaywrightPage,
BrowserContext as PlaywrightContext,
} from "@playwright/test";
import { LLMClient } from "./llm/LLMClient";
import { ActOptions, ActResult, GotoOptions, Stagehand } from "./index";
import { StagehandActHandler } from "./handlers/actHandler";
import { StagehandContext } from "./StagehandContext";
import { Page } from "../types/page";

export class StagehandPage {
private stagehand: Stagehand;
private intPage: PlaywrightPage;
private intPage: Page;
private intContext: StagehandContext;
private actHandler: StagehandActHandler;
private llmClient: LLMClient;

constructor(page: PlaywrightPage, stagehand: Stagehand) {
constructor(
page: PlaywrightPage,
stagehand: Stagehand,
context: StagehandContext,
llmClient: LLMClient,
) {
this.intPage = page;
this.stagehand = stagehand;
this.intContext = context;
this.actHandler = new StagehandActHandler({
verbose: this.stagehand.verbose,
llmProvider: this.stagehand.llmProvider,
enableCaching: this.stagehand.enableCaching,
logger: this.stagehand.logger,
stagehandPage: this,
stagehandContext: this.intContext,
llmClient: llmClient,
});
this.llmClient = llmClient;
}

async init(
Expand All @@ -31,17 +57,27 @@ export class StagehandPage {
return result;
};

if (prop === "act") {
return async (options: ActOptions) => {
return this.act(options);
};
}

return target[prop as keyof PlaywrightPage];
},
});
await this._waitForSettledDom();
return this;
}

public get page(): PlaywrightPage {
public get page(): Page {
return this.intPage;
}

public get context(): PlaywrightContext {
return this.intContext.context;
}

// We can make methods public because StagehandPage is private to the Stagehand class.
// When a user gets stagehand.page, they are getting a proxy to the Playwright page.
// We can override the methods on the proxy to add our own behavior
Expand Down Expand Up @@ -150,4 +186,80 @@ export class StagehandPage {
await this.page.evaluate(() => window.cleanupDebug()).catch(() => {});
}
}

async act({
action,
modelName,
modelClientOptions,
useVision = "fallback",
variables = {},
domSettleTimeoutMs,
}: ActOptions): Promise<ActResult> {
if (!this.actHandler) {
throw new Error("Act handler not initialized");
}

useVision = useVision ?? "fallback";
const requestId = Math.random().toString(36).substring(2);
const llmClient: LLMClient = modelName
? this.stagehand.llmProvider.getClient(modelName, modelClientOptions)
: this.llmClient;

this.stagehand.log({
category: "act",
message: "running act",
level: 1,
auxiliary: {
action: {
value: action,
type: "string",
},
requestId: {
value: requestId,
type: "string",
},
modelName: {
value: llmClient.modelName,
type: "string",
},
},
});

return this.actHandler
.act({
action,
llmClient,
chunksSeen: [],
useVision,
verifierUseVision: useVision !== false,
requestId,
variables,
previousSelectors: [],
skipActionCacheForThisStep: false,
domSettleTimeoutMs,
})
.catch((e) => {
this.stagehand.log({
category: "act",
message: "error acting",
level: 1,
auxiliary: {
error: {
value: e.message,
type: "string",
},
trace: {
value: e.stack,
type: "string",
},
},
});

return {
success: false,
message: `Internal error: Error acting: ${e.message}`,
action: action,
};
});
}
}
Loading

0 comments on commit d8ab6e5

Please sign in to comment.