@@ -14,8 +14,8 @@ def run_openai_eval(args):
14
14
question_query_df ["generated_query" ] = ""
15
15
question_query_df ["reason" ] = ""
16
16
question_query_df ["error_msg" ] = ""
17
+ question_query_df ["exact_match" ] = 0
17
18
question_query_df ["correct" ] = 0
18
- question_query_df ["subset" ] = 0
19
19
question_query_df ["error_query_gen" ] = 0
20
20
question_query_df ["error_db_exec" ] = 0
21
21
question_query_df ["timeout" ] = 0
@@ -84,7 +84,7 @@ def run_openai_eval(args):
84
84
db_name = row ["db_name" ]
85
85
question = row ["question" ]
86
86
query_category = row ["query_category" ]
87
- correct = subset = 0
87
+ exact_match = correct = 0
88
88
generated_result = expected_result = None
89
89
db_creds = {
90
90
"host" : "localhost" ,
@@ -103,23 +103,23 @@ def run_openai_eval(args):
103
103
query_gen , db_name , db_creds , args .timeout_exec
104
104
)
105
105
generated_result = generated_result .rename (columns = str .lower )
106
- correct = subset = int (
106
+ exact_match = correct = int (
107
107
compare_df (
108
108
expected_result , generated_result , query_category , question
109
109
)
110
110
)
111
- if not correct :
112
- subset = subset_df (
111
+ if not exact_match :
112
+ correct = subset_df (
113
113
df_sub = expected_result ,
114
114
df_super = generated_result ,
115
115
query_category = query_category ,
116
116
question = question ,
117
117
verbose = args .verbose ,
118
118
)
119
+ row ["exact_match" ] = int (exact_match )
119
120
row ["correct" ] = int (correct )
120
- row ["subset" ] = int (subset )
121
121
row ["error_msg" ] = ""
122
- if subset :
122
+ if correct :
123
123
total_correct += 1
124
124
except QueryCanceledError as e :
125
125
row ["timeout" ] = 1
@@ -136,8 +136,8 @@ def run_openai_eval(args):
136
136
output_df .to_csv (args .output_file , index = False , float_format = "%.2f" )
137
137
138
138
# get average accuracy
139
- avg_acc = output_df ["correct " ].sum () / len (output_df )
139
+ avg_acc = output_df ["exact_match " ].sum () / len (output_df )
140
140
print (f"Average accuracy: { avg_acc :.2f} " )
141
141
# get average subset or correct accuracy
142
- avg_subset = output_df ["subset " ].sum () / len (output_df )
142
+ avg_subset = output_df ["correct " ].sum () / len (output_df )
143
143
print (f"Average subset accuracy: { avg_subset :.2f} " )
0 commit comments