From 5e102482d256e6e0dd638db718fdd7215963da8b Mon Sep 17 00:00:00 2001
From: Colin Wang <zw1300@princeton.edu>
Date: Sun, 18 Aug 2024 18:11:39 -0400
Subject: [PATCH] Update grading functions: 1. Fix the gpt-4o grader API
 version to avoid using the recently released one. 2. Fix grading API calling
 logics for reasoning Qs to increase robustness.

---
 src/descriptive_utils.py | 2 +-
 src/reasoning_utils.py   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/descriptive_utils.py b/src/descriptive_utils.py
index e6fcb05..1da1be9 100644
--- a/src/descriptive_utils.py
+++ b/src/descriptive_utils.py
@@ -33,7 +33,7 @@ def get_descriptive_result_gpt(client, prompt, length, max_retries=10):
                         "content": prompt,
                     }
                 ],
-                model="gpt-4o",
+                model="gpt-4o-2024-05-13",
                 response_format={"type": "json_object"},
                 n=1,
                 max_tokens=max_tokens,
diff --git a/src/reasoning_utils.py b/src/reasoning_utils.py
index fa4f1a7..7958509 100644
--- a/src/reasoning_utils.py
+++ b/src/reasoning_utils.py
@@ -16,7 +16,7 @@ def get_reasoning_result_gpt(client, prompt, max_retries=10):
                         "content": prompt,
                     }
                 ],
-                model="gpt-4o",
+                model="gpt-4o-2024-05-13",
                 response_format={"type": "json_object"},
                 n=1,
                 max_tokens=max_tokens,
@@ -33,6 +33,7 @@ def get_reasoning_result_gpt(client, prompt, max_retries=10):
             if 'Unterminated string starting at' in str(e):
                 if max_tokens >= 1024:
                     print(f"Failed to get response for prompt: {prompt}")
+                    ext, scr = 'Failed to parse response', -1
                     break
                 else:
                     max_tokens = min(1024, max_tokens * 2) # double the max_tokens