Skip to content

Commit

Permalink
test: add gpt-4o-mini to smoke test github workflow
Browse files Browse the repository at this point in the history
- Add gpt-4o-mini to smoke test github workflow
- Add gpt-4o-mini smoke test golden files
- Remove outdated gpt-4o model and 4-turbo smoke test config and golden
files
- Add golden files for gpt-4o-2024-08-06
- Regenerate golden files for existing models to drop callProgress
  events (we weren't comparing these anyway)

Signed-off-by: Nick Hale <[email protected]>
  • Loading branch information
njhale committed Oct 14, 2024
1 parent 89a9398 commit 2380ce1
Show file tree
Hide file tree
Showing 15 changed files with 2,644 additions and 12,216 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/smoke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
echo "run_smoke_tests=false" >> $GITHUB_OUTPUT
gpt-4o-2024-05-13:
gpt-4o-2024-08-06:
needs: check-label
if: ${{ needs.check-label.outputs.run_smoke_tests == 'true' }}
runs-on: ubuntu-22.04
Expand All @@ -81,14 +81,14 @@ jobs:
go-version: "1.21"
- env:
OPENAI_API_KEY: ${{ secrets.SMOKE_OPENAI_API_KEY }}
GPTSCRIPT_DEFAULT_MODEL: gpt-4o-2024-05-13
name: Run smoke test for gpt-4o-2024-05-13
GPTSCRIPT_DEFAULT_MODEL: gpt-4o-2024-08-06
name: Run smoke test for gpt-4o-2024-08-06
run: |
echo "Running smoke test for model gpt-4o-2024-05-13"
echo "Running smoke test for model gpt-4o-2024-08-06"
export PATH="$(pwd)/bin:${PATH}"
make smoke
gpt-4-turbo-2024-04-09:
gpt-4o-mini-2024-07-18:
needs: check-label
if: ${{ needs.check-label.outputs.run_smoke_tests == 'true' }}
runs-on: ubuntu-22.04
Expand All @@ -110,10 +110,10 @@ jobs:
go-version: "1.21"
- env:
OPENAI_API_KEY: ${{ secrets.SMOKE_OPENAI_API_KEY }}
GPTSCRIPT_DEFAULT_MODEL: gpt-4-turbo-2024-04-09
name: Run smoke test for gpt-4-turbo-2024-04-09
GPTSCRIPT_DEFAULT_MODEL: gpt-4o-mini-2024-07-18
name: Run smoke test for gpt-4o-mini-2024-07-18
run: |
echo "Running smoke test for model gpt-4-turbo-2024-04-09"
echo "Running smoke test for model gpt-4o-mini-2024-07-18"
export PATH="$(pwd)/bin:${PATH}"
make smoke
Expand Down
4 changes: 2 additions & 2 deletions pkg/tests/judge/judge.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ func New[T any](client *openai.Client) (*Judge[T], error) {
}

func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria string) (equal bool, reasoning string, err error) {
comparisonJSON, err := json.MarshalIndent(&comparison[T]{
comparisonJSON, err := json.Marshal(&comparison[T]{
Expected: expected,
Actual: actual,
}, "", " ")
})
if err != nil {
return false, "", fmt.Errorf("failed to marshal judge testcase JSON: %w", err)
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/tests/smoke/smoke_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ func getActualEvents(t *testing.T, eventsFile string) []event {

var e event
require.NoError(t, json.Unmarshal([]byte(line), &e))

if e.Type == runner.EventTypeCallProgress {
continue
}

events = append(events, e)
}

Expand Down
364 changes: 66 additions & 298 deletions pkg/tests/smoke/testdata/Bob/claude-3-5-sonnet-20240620-expected.json

Large diffs are not rendered by default.

Loading

0 comments on commit 2380ce1

Please sign in to comment.