Skip to content

Commit

Permalink
package json and changeset
Browse files Browse the repository at this point in the history
  • Loading branch information
kamath committed Dec 22, 2024
1 parent d075cc5 commit 1e9ee1c
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 50 deletions.
4 changes: 2 additions & 2 deletions .changeset/dirty-apples-pay.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
"@browserbasehq/stagehand": major
"@browserbasehq/stagehand": patch
---

Move stagehand.act() -> stagehand.page.act()
Move stagehand.act() -> stagehand.page.act() and deprecate stagehand.act()
94 changes: 48 additions & 46 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,53 @@ jobs:
- name: Run E2E Tests
run: npm run e2e

run-act-evals:
runs-on: ubuntu-latest
timeout-minutes: 25
needs: [run-text-extract-evals]
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
HEADLESS: true
EVAL_ENV: browserbase

steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: "20"

- name: Install dependencies
run: npm install --no-frozen-lockfile

- name: Install Playwright browsers
run: npm exec playwright install --with-deps

- name: Run Act Evals
run: npm run evals category act

- name: Log Act Evals Performance
run: |
experimentName=$(jq -r '.experimentName' eval-summary.json)
echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}"
if [ -f eval-summary.json ]; then
act_score=$(jq '.categories.act' eval-summary.json)
echo "Act category score: $act_score%"
if (( $(echo "$act_score < 80" | bc -l) )); then
echo "Act category score is below 80%. Failing CI."
exit 1
fi
else
echo "Eval summary not found for act category. Failing CI."
exit 1
fi
run-extract-evals:
needs: [run-lint, run-build, run-e2e-tests]
runs-on: ubuntu-latest
Expand Down Expand Up @@ -201,52 +248,7 @@ jobs:
exit 1
fi
run-act-evals:
runs-on: ubuntu-latest
timeout-minutes: 25
needs: [run-text-extract-evals]
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
HEADLESS: true
EVAL_ENV: browserbase

steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: "20"

- name: Install dependencies
run: npm install --no-frozen-lockfile

- name: Install Playwright browsers
run: npm exec playwright install --with-deps

- name: Run Act Evals
run: npm run evals category act

- name: Log Act Evals Performance
run: |
experimentName=$(jq -r '.experimentName' eval-summary.json)
echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}"
if [ -f eval-summary.json ]; then
act_score=$(jq '.categories.act' eval-summary.json)
echo "Act category score: $act_score%"
if (( $(echo "$act_score < 80" | bc -l) )); then
echo "Act category score is below 80%. Failing CI."
exit 1
fi
else
echo "Eval summary not found for act category. Failing CI."
exit 1
fi

run-observe-evals:
runs-on: ubuntu-latest
Expand Down
3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,5 @@
"bugs": {
"url": "https://github.com/browserbase/stagehand/issues"
},
"homepage": "https://github.com/browserbase/stagehand#readme",
"packageManager": "[email protected]+sha512.76e2379760a4328ec4415815bcd6628dee727af3779aaa4c914e3944156c4299921a89f976381ee107d41f12cfa4b66681ca9c718f0668fa0831ed4c6d8ba56c"
"homepage": "https://github.com/browserbase/stagehand#readme"
}

0 comments on commit 1e9ee1c

Please sign in to comment.