From 5e102482d256e6e0dd638db718fdd7215963da8b Mon Sep 17 00:00:00 2001 From: Colin Wang <zw1300@princeton.edu> Date: Sun, 18 Aug 2024 18:11:39 -0400 Subject: [PATCH] Update grading functions: 1. Fix the gpt-4o grader API version to avoid using the recently released one. 2. Fix grading API calling logics for reasoning Qs to increase robustness. --- src/descriptive_utils.py | 2 +- src/reasoning_utils.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/descriptive_utils.py b/src/descriptive_utils.py index e6fcb05..1da1be9 100644 --- a/src/descriptive_utils.py +++ b/src/descriptive_utils.py @@ -33,7 +33,7 @@ def get_descriptive_result_gpt(client, prompt, length, max_retries=10): "content": prompt, } ], - model="gpt-4o", + model="gpt-4o-2024-05-13", response_format={"type": "json_object"}, n=1, max_tokens=max_tokens, diff --git a/src/reasoning_utils.py b/src/reasoning_utils.py index fa4f1a7..7958509 100644 --- a/src/reasoning_utils.py +++ b/src/reasoning_utils.py @@ -16,7 +16,7 @@ def get_reasoning_result_gpt(client, prompt, max_retries=10): "content": prompt, } ], - model="gpt-4o", + model="gpt-4o-2024-05-13", response_format={"type": "json_object"}, n=1, max_tokens=max_tokens, @@ -33,6 +33,7 @@ def get_reasoning_result_gpt(client, prompt, max_retries=10): if 'Unterminated string starting at' in str(e): if max_tokens >= 1024: print(f"Failed to get response for prompt: {prompt}") + ext, scr = 'Failed to parse response', -1 break else: max_tokens = min(1024, max_tokens * 2) # double the max_tokens