From 1e9ee1c93dfbcb48033a32b688397d27ba4aaec0 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sat, 21 Dec 2024 23:08:41 -0800 Subject: [PATCH] package json and changeset --- .changeset/dirty-apples-pay.md | 4 +- .github/workflows/ci.yml | 94 +++++++++++++++++----------------- package.json | 3 +- 3 files changed, 51 insertions(+), 50 deletions(-) diff --git a/.changeset/dirty-apples-pay.md b/.changeset/dirty-apples-pay.md index 93aa10d4..38be23d6 100644 --- a/.changeset/dirty-apples-pay.md +++ b/.changeset/dirty-apples-pay.md @@ -1,5 +1,5 @@ --- -"@browserbasehq/stagehand": major +"@browserbasehq/stagehand": patch --- -Move stagehand.act() -> stagehand.page.act() +Move stagehand.act() -> stagehand.page.act() and deprecate stagehand.act() diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 99a46700..93377ddf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,6 +85,53 @@ jobs: - name: Run E2E Tests run: npm run e2e +run-act-evals: + runs-on: ubuntu-latest + timeout-minutes: 25 + needs: [run-text-extract-evals] + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} + BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} + BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} + HEADLESS: true + EVAL_ENV: browserbase + + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install dependencies + run: npm install --no-frozen-lockfile + + - name: Install Playwright browsers + run: npm exec playwright install --with-deps + + - name: Run Act Evals + run: npm run evals category act + + - name: Log Act Evals Performance + run: | + experimentName=$(jq -r '.experimentName' eval-summary.json) + echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}" + if [ -f eval-summary.json ]; then + act_score=$(jq '.categories.act' eval-summary.json) + echo "Act category score: $act_score%" + if (( $(echo "$act_score < 80" | bc -l) )); then + echo "Act category score is below 80%. Failing CI." + exit 1 + fi + else + echo "Eval summary not found for act category. Failing CI." + exit 1 + fi + run-extract-evals: needs: [run-lint, run-build, run-e2e-tests] runs-on: ubuntu-latest @@ -201,52 +248,7 @@ jobs: exit 1 fi - run-act-evals: - runs-on: ubuntu-latest - timeout-minutes: 25 - needs: [run-text-extract-evals] - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} - BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} - BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} - HEADLESS: true - EVAL_ENV: browserbase - - steps: - - name: Check out repository code - uses: actions/checkout@v4 - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: "20" - - - name: Install dependencies - run: npm install --no-frozen-lockfile - - - name: Install Playwright browsers - run: npm exec playwright install --with-deps - - - name: Run Act Evals - run: npm run evals category act - - - name: Log Act Evals Performance - run: | - experimentName=$(jq -r '.experimentName' eval-summary.json) - echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}" - if [ -f eval-summary.json ]; then - act_score=$(jq '.categories.act' eval-summary.json) - echo "Act category score: $act_score%" - if (( $(echo "$act_score < 80" | bc -l) )); then - echo "Act category score is below 80%. Failing CI." - exit 1 - fi - else - echo "Eval summary not found for act category. Failing CI." - exit 1 - fi + run-observe-evals: runs-on: ubuntu-latest diff --git a/package.json b/package.json index 9941326a..e0eeaee7 100644 --- a/package.json +++ b/package.json @@ -81,6 +81,5 @@ "bugs": { "url": "https://github.com/browserbase/stagehand/issues" }, - "homepage": "https://github.com/browserbase/stagehand#readme", - "packageManager": "pnpm@9.15.0+sha512.76e2379760a4328ec4415815bcd6628dee727af3779aaa4c914e3944156c4299921a89f976381ee107d41f12cfa4b66681ca9c718f0668fa0831ed4c6d8ba56c" + "homepage": "https://github.com/browserbase/stagehand#readme" }