gptscript-ai
diff --git a/‎.github/workflows/smoke.yaml
+8-8 b/‎.github/workflows/smoke.yaml
+8-8
diff --git a/‎pkg/tests/judge/judge.go
+2-2 b/‎pkg/tests/judge/judge.go
+2-2
diff --git a/‎pkg/tests/smoke/smoke_test.go
+5 b/‎pkg/tests/smoke/smoke_test.go
+5
diff --git a/‎pkg/tests/smoke/testdata/Bob/claude-3-5-sonnet-20240620-expected.json
+69-379 b/‎pkg/tests/smoke/testdata/Bob/claude-3-5-sonnet-20240620-expected.json
+69-379
@@ -59,7 +59,7 @@ jobs:
 
           echo "run_smoke_tests=false" >> $GITHUB_OUTPUT
 
-  gpt-4o-2024-05-13:
+  gpt-4o-2024-08-06:
     needs: check-label
     if: ${{ needs.check-label.outputs.run_smoke_tests == 'true' }}
     runs-on: ubuntu-22.04
@@ -81,14 +81,14 @@ jobs:
           go-version: "1.21"
       - env:
           OPENAI_API_KEY: ${{ secrets.SMOKE_OPENAI_API_KEY }}
-          GPTSCRIPT_DEFAULT_MODEL: gpt-4o-2024-05-13
-        name: Run smoke test for gpt-4o-2024-05-13
+          GPTSCRIPT_DEFAULT_MODEL: gpt-4o-2024-08-06
+        name: Run smoke test for gpt-4o-2024-08-06
         run: |
-          echo "Running smoke test for model gpt-4o-2024-05-13"
+          echo "Running smoke test for model gpt-4o-2024-08-06"
           export PATH="$(pwd)/bin:${PATH}"
           make smoke
 
-  gpt-4-turbo-2024-04-09:
+  gpt-4o-mini-2024-07-18:
     needs: check-label
     if: ${{ needs.check-label.outputs.run_smoke_tests == 'true' }}
     runs-on: ubuntu-22.04
@@ -110,10 +110,10 @@ jobs:
           go-version: "1.21"
       - env:
           OPENAI_API_KEY: ${{ secrets.SMOKE_OPENAI_API_KEY }}
-          GPTSCRIPT_DEFAULT_MODEL: gpt-4-turbo-2024-04-09
-        name: Run smoke test for gpt-4-turbo-2024-04-09
+          GPTSCRIPT_DEFAULT_MODEL: gpt-4o-mini-2024-07-18
+        name: Run smoke test for gpt-4o-mini-2024-07-18
         run: |
-          echo "Running smoke test for model gpt-4-turbo-2024-04-09"
+          echo "Running smoke test for model gpt-4o-mini-2024-07-18"
           export PATH="$(pwd)/bin:${PATH}"
           make smoke
 
 
@@ -86,10 +86,10 @@ func New[T any](client *openai.Client) (*Judge[T], error) {
 }
 
 func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria string) (equal bool, reasoning string, err error) {
-	comparisonJSON, err := json.MarshalIndent(&comparison[T]{
+	comparisonJSON, err := json.Marshal(&comparison[T]{
 		Expected: expected,
 		Actual:   actual,
-	}, "", "    ")
+	})
 	if err != nil {
 		return false, "", fmt.Errorf("failed to marshal judge testcase JSON: %w", err)
 	}
 
@@ -175,6 +175,11 @@ func getActualEvents(t *testing.T, eventsFile string) []event {
 
 		var e event
 		require.NoError(t, json.Unmarshal([]byte(line), &e))
+
+		if e.Type == runner.EventTypeCallProgress {
+			continue
+		}
+
 		events = append(events, e)
 	}
Original file line number	Diff line number	Diff line change
`@@ -86,10 +86,10 @@ func New[T any](client openai.Client) (Judge[T], error) {`
`86`	`86`	`}`
`87`	`87`
`88`	`88`	`func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria string) (equal bool, reasoning string, err error) {`
`89`		`- comparisonJSON, err := json.MarshalIndent(&comparison[T]{`
	`89`	`+ comparisonJSON, err := json.Marshal(&comparison[T]{`
`90`	`90`	`Expected: expected,`
`91`	`91`	`Actual: actual,`
`92`		`- }, "", " ")`
	`92`	`+ })`
`93`	`93`	`if err != nil {`
`94`	`94`	`return false, "", fmt.Errorf("failed to marshal judge testcase JSON: %w", err)`
`95`	`95`	`}`