-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexperimental_data_generation.py
398 lines (341 loc) · 28 KB
/
experimental_data_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
import json
import os
import statistics
from datetime import datetime as dt
from pathlib import Path
from string import Template
from typing import Any, Optional, Callable
import anthropic
import google.generativeai as google_genai
from anthropic import Anthropic
from google.generativeai import GenerativeModel
from google.generativeai.types import safety_types
from jsonschema import Draft202012Validator
from data_processing.data_loading import load_scenarios, load_objects_for_one_model_and_scenario, \
load_text_passages_for_one_model_and_scenario
from ai_querying.ai_querying_util_funcs import extract_text_passage_from_output, extract_json_doc_from_output, \
generate_with_model
from ai_querying.ai_querying_defs import google_api_key_env, anthropic_api_key_env, google_model_specifier, \
anthropic_model_specifier, anthropic_generation_temp, google_generation_temp, google_reconstruction_temp, \
anthropic_obj_gen_group_size, google_obj_gen_group_size, max_num_api_calls_for_schema_validation_retry_logic, \
ModelProvider, AnthropicClientBundle
from ai_querying.system_prompts import anthropic_object_generation_sys_prompt, google_object_generation_sys_prompt, \
anthropic_text_passage_generation_sys_prompt, google_text_passage_generation_sys_prompt, \
google_object_reconstruction_sys_prompt
from data_processing.data_mngmt_defs import schemas_path, claude_objs_path, gemini_objs_path, claude_texts_path, \
gemini_texts_path
from utils_and_defs.logging_setup import create_logger
from utils_and_defs.trivial_util_funcs import d
from validate_generated_json_objs_and_texts import validate_generated_objects_texts
logger = create_logger(__name__)
def generate_json_objs(google_client: Optional[GenerativeModel], anthropic_client: Optional[Anthropic], schema_idx: int,
schema: dict[str,Any], scenario_domain: str, scenario_texts_label: str,
increment_problem_counter: Callable[[bool], None], target_num_objs: int
) -> (list[Optional[dict[str, Any]]], list[str], dict[int, int]):
"""
:param google_client:
:param anthropic_client:
:param schema_idx:
:param schema:
:param scenario_domain:
:param scenario_texts_label:
:param increment_problem_counter:
:param target_num_objs:
:return: list of generated objects, list of analysis strings for each object generations, and mapping from object
index to analysis string index (since an analysis string goes with a particular API call and a typical API call
will produce more than 1 schema-compliant object)
"""
assert (google_client is not None) ^ (anthropic_client is not None)#XOR- exactly one of them should be defined
should_use_claude: bool = google_client is None
model_nm = "Claude" if should_use_claude else "Gemini"
bundled_anthropic_client = AnthropicClientBundle(anthropic_client, anthropic_object_generation_sys_prompt,
4096, anthropic_generation_temp, anthropic_model_specifier)
logger.info(f"Generating {target_num_objs} objects with {model_nm} for scenario {scenario_domain} - {scenario_texts_label}")
generated_objects: list[Optional[dict[str,Any]]] = []
obj_gen_analysis_strs: list[str] = []
obj_idx_to_analysis_idx: dict[int, int] = {}
user_prompt_template = Template(d(f"""
Here is such a JSON schema for the domain "{scenario_domain}":
```json
${{curr_scenario_schema}}
```
This describes the pieces of information that someone might want to extract in a structured way from "{scenario_texts_label}" text passages.
Please generate a JSON array containing {target_num_objs} diverse JSON objects conforming to that schema, following the above instructions while doing so.
"""))
user_prompt = user_prompt_template.safe_substitute(curr_scenario_schema=json.dumps(schema, indent=2))
ai_responses: list[str] = []
followup_prompts: list[str] = []
resp_text: str = ""
for retry_idx in range(max_num_api_calls_for_schema_validation_retry_logic):
assert len(ai_responses) == len(followup_prompts)
obj_gen_analysis: str
if retry_idx > 0:
logger.debug(f"Retrying generation of JSON objects for scenario {schema_idx} {scenario_domain} - {scenario_texts_label} ({retry_idx} prior attempts)")
resp_text = generate_with_model(
ModelProvider.ANTHROPIC if should_use_claude else ModelProvider.GOOGLE_DEEPMIND, user_prompt, ai_responses,
followup_prompts, google_client, bundled_anthropic_client)
curr_generated_objects, obj_gen_analysis, json_doc_problem_explanation = \
extract_json_doc_from_output(resp_text, is_obj_vs_arr=False)
valid_idxs_in_curr_round: list[int] = []
error_feedback: str = ""
schema_validation_feedback_msgs: list[str] = []
if curr_generated_objects is None:
logger.warning(f"Failed to extract JSON objects from {model_nm} output for schema index {schema_idx}")#model response will've been printed out already by the find-json-doc-substring-of-model-output method
error_feedback = f"The response was not formatted as instructed, and so the JSON document could not be extracted from it. Details:\n{json_doc_problem_explanation}"
else:
num_objs_b4_curr_round = len(generated_objects)
expected_num_objs_for_curr_round = target_num_objs-num_objs_b4_curr_round
if len(curr_generated_objects) != expected_num_objs_for_curr_round:
logger.warning(f"{model_nm} generated {len(curr_generated_objects)} objects instead of the expected {expected_num_objs_for_curr_round} for schema index {schema_idx}\nResponse:{resp_text}")
error_feedback = f"There were not enough objects generated; only {len(curr_generated_objects)} were found when {target_num_objs-len(generated_objects)} were asked for.\n"
schema_validator = Draft202012Validator(schema, format_checker=Draft202012Validator.FORMAT_CHECKER)
for obj_idx, obj in enumerate(curr_generated_objects):
if schema_validator.is_valid(obj):
generated_objects.append(obj)
#this will become a valid analysis string index right after this for loop over current-round objects
obj_idx_to_analysis_idx[len(generated_objects)-1] = len(obj_gen_analysis_strs)
valid_idxs_in_curr_round.append(obj_idx)
else:
schema_validation_errs = "; ".join([str(err) for err in schema_validator.iter_errors(obj)])
logger.warning(f"{model_nm}-generated object {obj_idx} for schema index {schema_idx} failed schema validation\nSchema:{schema}\nObject:{json.dumps(obj, indent=4)}\nErrors:{schema_validation_errs}")
schema_validation_feedback_msgs.append(f"The {obj_idx}th object from the most recent round failed schema validation:\nHere is the object:\n{json.dumps(obj)}\nHere are the schema validation errors:\n{schema_validation_errs}")
if valid_idxs_in_curr_round:
obj_gen_analysis_strs.append(obj_gen_analysis)
if schema_validation_feedback_msgs:
error_feedback += f"Some of the objects just generated failed to follow the schema:\n--------------\n{"\n---------------\n".join(schema_validation_feedback_msgs)}"
logger.debug(f"Using {model_nm}, generated {len(curr_generated_objects)} objects for scenario {scenario_domain} - {scenario_texts_label}:\nValid indexes within this round were: {valid_idxs_in_curr_round}\n{json.dumps(curr_generated_objects, indent=4)}\n\nAnalysis of object generation:\n{obj_gen_analysis}\n\nGlobal case ids of objects: {", ".join([f"case id {model_nm}-{schema_idx}-{new_obj_idx}" for new_obj_idx in range(num_objs_b4_curr_round, len(generated_objects))])}")
if len(generated_objects) >= target_num_objs:
break
remaining_obj_quota = target_num_objs - len(generated_objects)
ai_responses.append(resp_text)
next_prompt = f"There were problems with that output:\n{error_feedback}\nPlease generate a JSON array containing {remaining_obj_quota} additional diverse JSON objects conforming to that schema, following the system prompt instructions."
if len(generated_objects) > 0:
next_prompt += f"\nDo not repeat any of the previously-generated objects that conformed to the schema."
if len(schema_validation_feedback_msgs) > 0:
next_prompt += f"You may, however, create schema-compliant versions of any of the objects from this past round that were flagged as failing schema validation."
followup_prompts.append(next_prompt)
else:
num_none_fillers = target_num_objs - len(generated_objects)
logger.error(f"Exceeded retry limit when generating json objects with {model_nm} for schema index {schema_idx}; only successfully created {len(generated_objects)} objects that conformed to the schema, so adding {num_none_fillers} None objects to fill the gap")
increment_problem_counter(should_use_claude)
obj_gen_analysis_strs.append(resp_text)
for obj_idx in range(len(generated_objects), target_num_objs):
generated_objects.append(None)
obj_idx_to_analysis_idx[obj_idx] = len(obj_gen_analysis_strs)-1
logger.debug(f"Using {model_nm}, generated {len(generated_objects)} objects for scenario {scenario_domain} - {scenario_texts_label}:\n{json.dumps(generated_objects, indent=4)}")
return generated_objects, obj_gen_analysis_strs, obj_idx_to_analysis_idx
def generate_text_passages(google_client: Optional[GenerativeModel], anthropic_client: Optional[Anthropic],
schema_idx: int, schema: dict[str,Any], scenario_domain: str, scenario_texts_label: str,
json_objs: list[Optional[dict[str,Any]]], increment_problem_counter: Callable[[bool], None]
) -> (list[Optional[str]], list[str]):
assert (google_client is not None) ^ (anthropic_client is not None)#XOR- exactly one of them should be defined
should_use_claude: bool = google_client is None
model_nm = "Claude" if should_use_claude else "Gemini"
bundled_anthropic_client = AnthropicClientBundle(anthropic_client, anthropic_text_passage_generation_sys_prompt,
4096, anthropic_generation_temp, anthropic_model_specifier)
logger.info(f"Generating text passages with {model_nm} for scenario {scenario_domain} - {scenario_texts_label}")
user_prompt_template = Template(d(f"""
Here is a JSON schema for the domain "{scenario_domain}":
```json
${{curr_scenario_schema}}
```
This describes the pieces of information that someone might want to extract in a structured way from "{scenario_texts_label}" text passages.
Here is a JSON object that follows that schema:
```json
${{schema_generated_json_instance}}
```
Please generate a “{scenario_texts_label}” free-text document that includes the JSON object's details, following the above instructions while doing so.
"""))
text_passages: list[Optional[str]] = []
text_creation_analyses: list[str] = []
for obj_idx, obj in enumerate(json_objs):
if obj is None:
text_passages.append(None)
logger.debug(f"With {model_nm}, skipping text passage generation for {obj_idx}th of {len(json_objs)} objects for schema index {schema_idx} because that object's generation seems to have gone awry")
continue
user_prompt = user_prompt_template.safe_substitute(curr_scenario_schema= json.dumps(schema, indent=2), schema_generated_json_instance=(json.dumps(obj, indent=2)))
resp_text: str = generate_with_model(
ModelProvider.ANTHROPIC if should_use_claude else ModelProvider.GOOGLE_DEEPMIND, user_prompt, [], [],
google_client, bundled_anthropic_client)
text_passage, text_gen_analysis = extract_text_passage_from_output(resp_text)
if text_passage is None:
logger.error(f"Failed to extract text passage from {model_nm} output for {obj_idx}th of {len(json_objs)} objects for schema index {schema_idx}\nResponse:{resp_text}")
increment_problem_counter(should_use_claude)
else:
logger.debug(f"Using {model_nm}, generated a text passage from the {obj_idx}'th object for scenario {scenario_domain} - {scenario_texts_label} (case id {model_nm}-{schema_idx}-{obj_idx}):\n{text_passage}\n\nAnalysis of text generation:\n{text_gen_analysis}")
text_passages.append(text_passage)
text_creation_analyses.append(text_gen_analysis)
return text_passages, text_creation_analyses
def main():
run_start_ts = dt.now()
gemini_obj_gen_problem_count = 0
gemini_text_gen_problem_count = 0
claude_obj_gen_problem_count = 0
claude_text_gen_problem_count = 0
reconstruction_from_gemini_texts_problem_count = 0
reconstruction_from_claude_texts_problem_count = 0
def increment_obj_gen_problem_count(was_claude_gen: bool):
if was_claude_gen:
nonlocal claude_obj_gen_problem_count
claude_obj_gen_problem_count += 1
else:
nonlocal gemini_obj_gen_problem_count
gemini_obj_gen_problem_count += 1
def increment_text_gen_problem_count(was_claude_gen: bool):
if was_claude_gen:
nonlocal claude_text_gen_problem_count
claude_text_gen_problem_count += 1
else:
nonlocal gemini_text_gen_problem_count
gemini_text_gen_problem_count += 1
def increment_reconstruction_problem_count(was_claude_generated_text_passage: bool):
if was_claude_generated_text_passage:
nonlocal reconstruction_from_claude_texts_problem_count
reconstruction_from_claude_texts_problem_count += 1
else:
nonlocal reconstruction_from_gemini_texts_problem_count
reconstruction_from_gemini_texts_problem_count += 1
scenario_domains, scenario_text_passage_descriptions, schemas = load_scenarios(schemas_path)
google_generation_config_for_data_gen={"temperature": google_generation_temp, "max_output_tokens": 8192}
google_generation_config_for_reconstruction={"temperature": google_reconstruction_temp, "max_output_tokens": 4096}
google_genai.configure(api_key=os.environ[google_api_key_env])
google_client_for_obj_gen = google_genai.GenerativeModel(
google_model_specifier, generation_config=google_generation_config_for_data_gen,
safety_settings=safety_types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
system_instruction=google_object_generation_sys_prompt)
google_client_for_text_gen = google_genai.GenerativeModel(
google_model_specifier, generation_config=google_generation_config_for_data_gen,
safety_settings=safety_types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
system_instruction=google_text_passage_generation_sys_prompt)
google_client_for_reconstruction = google_genai.GenerativeModel(
google_model_specifier, safety_settings=safety_types.HarmBlockThreshold.BLOCK_ONLY_HIGH,
system_instruction=google_object_reconstruction_sys_prompt, generation_config=google_generation_config_for_reconstruction)
anthropic_client = anthropic.Anthropic(api_key=os.environ[anthropic_api_key_env])
extraction_qualities_for_gemini_generated_texts: dict[int, float] = {}
extraction_qualities_for_claude_generated_texts: dict[int, float] = {}
# teammates - you can temporarily edit these 2 numbers if you only want to work on certain schemas
first_scenario_idx = 0
schema_idx_excl_bound = len(schemas)
validation_reports_folder_path = Path("validation_reports")
validation_reports_folder_path.mkdir(exist_ok=True)
validation_report_filepath = validation_reports_folder_path / f"validation_failures_report_{run_start_ts.isoformat()
.replace(":", "_").replace(".", "_")}.md"
with open(validation_report_filepath, "w") as validation_failures_file:
validation_failures_file.write(f"Validation failures report for data generation run starting at {run_start_ts.isoformat()} \n"
f"Going from scenario {first_scenario_idx} ({scenario_domains[first_scenario_idx]} - {scenario_text_passage_descriptions[first_scenario_idx]}) \n"
f"through scenario {schema_idx_excl_bound-1} ({scenario_domains[schema_idx_excl_bound-1]} - {scenario_text_passage_descriptions[schema_idx_excl_bound-1]}) \n"
f"Google model specifier: {google_model_specifier} \nAnthropic model specifier: {anthropic_model_specifier}\n\n")
target_num_objs_for_gemini_by_scenario: dict[int,int] = {}
target_num_objs_for_claude_by_scenario: dict[int,int] = {}
num_objs_generated_with_gemini_by_scenario: dict[int,int] = {}
num_objs_generated_with_claude_by_scenario: dict[int,int] = {}
num_valid_objs_generated_with_gemini_by_scenario: dict[int,int] = {}
num_valid_objs_generated_with_claude_by_scenario: dict[int,int] = {}
for should_generate_with_claude in [True, False]:
src_model_nm = "Claude" if should_generate_with_claude else "Gemini"
for scenario_idx in range(first_scenario_idx, schema_idx_excl_bound):
schema = schemas[scenario_idx]
scenario_domain = scenario_domains[scenario_idx]
scenario_texts_label = scenario_text_passage_descriptions[scenario_idx]
curr_objs_folder = claude_objs_path if should_generate_with_claude else gemini_objs_path
curr_case_objs_path = curr_objs_folder / (f"{scenario_idx}_{scenario_domain}__{scenario_texts_label}__objs.json".replace(" ", "_"))
curr_texts_folder = claude_texts_path if should_generate_with_claude else gemini_texts_path
curr_case_texts_path = curr_texts_folder / (f"{scenario_idx}_{scenario_domain}__{scenario_texts_label}__texts.json".replace(" ", "_"))
json_objs: list[dict[str,Any]] = load_objects_for_one_model_and_scenario(curr_objs_folder, schema,
scenario_idx) or []
text_passages: list[str] = load_text_passages_for_one_model_and_scenario(curr_texts_folder, scenario_idx
) or []
assert len(json_objs) == len(text_passages)
num_objs_needed_for_case = (anthropic_obj_gen_group_size if should_generate_with_claude
else google_obj_gen_group_size) - len(json_objs)
if num_objs_needed_for_case <= 0:
logger.info(f"Skipping generation of objects and text passages for scenario {scenario_idx} \"{scenario_domain}\" - \"{scenario_texts_label}\" because the needed number of objects has already been generated")
continue
if should_generate_with_claude:
target_num_objs_for_claude_by_scenario[scenario_idx] = num_objs_needed_for_case
else:
target_num_objs_for_gemini_by_scenario[scenario_idx] = num_objs_needed_for_case
google_client_to_use_for_obj_gen = None if should_generate_with_claude else google_client_for_obj_gen
google_client_to_use_for_text_gen = None if should_generate_with_claude else google_client_for_text_gen
google_client_to_use_for_reconstruction = google_client_for_reconstruction if should_generate_with_claude else None
anthropic_client_to_use_for_gen = anthropic_client if should_generate_with_claude else None
anthropic_client_to_use_for_reconstruction = None if should_generate_with_claude else anthropic_client
new_json_objs, obj_gen_analyses, new_obj_to_analysis_map = generate_json_objs(
google_client_to_use_for_obj_gen, anthropic_client_to_use_for_gen, scenario_idx, schema, scenario_domain,
scenario_texts_label, increment_obj_gen_problem_count, num_objs_needed_for_case)
new_text_passages, text_gen_analyses = generate_text_passages(
google_client_to_use_for_text_gen, anthropic_client_to_use_for_gen, scenario_idx, schema,
scenario_domain, scenario_texts_label, new_json_objs, increment_text_gen_problem_count)
num_objs_and_texts_generated = sum([1 for text in new_text_passages if text is not None])
if should_generate_with_claude:
num_objs_generated_with_claude_by_scenario[scenario_idx] = num_objs_and_texts_generated
else:
num_objs_generated_with_gemini_by_scenario[scenario_idx] = num_objs_and_texts_generated
logger.info(f"Starting auto-validation of {num_objs_needed_for_case} {src_model_nm}-generated objects and text passages for scenario {scenario_idx} \"{scenario_domain}\" - \"{scenario_texts_label}\"")
(avg_extraction_quality_for_case, val_failed_objs, val_failed_extraction_analyses,
val_failed_extraction_qualities, val_failed_fact_recalls, val_failed_hallucination_counts,
val_failed_extraction_differences) = validate_generated_objects_texts(
google_client_to_use_for_reconstruction, anthropic_client_to_use_for_reconstruction, scenario_idx,
schema, scenario_domain, scenario_texts_label, new_json_objs, new_text_passages,
increment_reconstruction_problem_count)
logger.info(f"Extraction quality for {scenario_idx} {scenario_domain} - {scenario_texts_label} was {avg_extraction_quality_for_case} when original object and text passage were generated by {src_model_nm}")
if should_generate_with_claude:
extraction_qualities_for_claude_generated_texts[scenario_idx] = avg_extraction_quality_for_case
else:
extraction_qualities_for_gemini_generated_texts[scenario_idx] = avg_extraction_quality_for_case
validation_passed_new_json_objs = [new_json_objs[obj_idx] for obj_idx in range(len(new_json_objs))
if obj_idx not in val_failed_objs]
validation_passed_new_text_passages = [new_text_passages[obj_idx] for obj_idx in range(len(new_text_passages))
if obj_idx not in val_failed_objs]
json_objs.extend(validation_passed_new_json_objs)
text_passages.extend(validation_passed_new_text_passages)
with open(curr_case_objs_path, "w") as objs_file:
json.dump(json_objs, objs_file, indent=4)
with open(curr_case_texts_path, "w") as texts_file:
json.dump(text_passages, texts_file, indent=4)
if should_generate_with_claude:
num_valid_objs_generated_with_claude_by_scenario[scenario_idx] = len(validation_passed_new_json_objs)
else:
num_valid_objs_generated_with_gemini_by_scenario[scenario_idx] = len(validation_passed_new_json_objs)
with open(validation_report_filepath, "a", encoding="utf-8") as validation_failures_file:
for failed_obj_idx in val_failed_objs:
validation_failures_file.write("\n----------------------------\n----------------------------\n\n"
f"# Object {failed_obj_idx} for scenario {scenario_idx} \"{scenario_domain}\" - \"{scenario_texts_label}\" failed validation:\n"
f"case id {src_model_nm}-{scenario_idx}-{failed_obj_idx} \nNote that object index is within current run\n"
f"## New object:\n```json\n{json.dumps(new_json_objs[failed_obj_idx], indent=4)}\n```\n"
f"## Extracted object:\n```json\n{json.dumps(val_failed_objs[failed_obj_idx], indent=4)}\n```\n"
f"## Extraction Evaluation\n"
f"Extraction quality: {val_failed_extraction_qualities[failed_obj_idx]:.4f} ;"
f"Fact recall: {val_failed_fact_recalls[failed_obj_idx]:.4f}; "
f"Hallucination count: {val_failed_hallucination_counts[failed_obj_idx]} \n"
f"Extraction differences: {val_failed_extraction_differences[failed_obj_idx]}\n"
f"## Text passage:\n{new_text_passages[failed_obj_idx]}\n"
f"## Analysis of object generation:\n{obj_gen_analyses[new_obj_to_analysis_map[failed_obj_idx]]}\n"
f"## Analysis of text generation:\n{text_gen_analyses[failed_obj_idx]}\n"
f"## Analysis of extraction:\n{val_failed_extraction_analyses[failed_obj_idx]}"
)
target_num_objs_for_gemini = sum(target_num_objs_for_gemini_by_scenario.values())
target_num_objs_for_claude = sum(target_num_objs_for_claude_by_scenario.values())
num_objs_generated_with_gemini =sum(num_objs_generated_with_gemini_by_scenario.values())
num_objs_generated_with_claude = sum(num_objs_generated_with_claude_by_scenario.values())
num_valid_objs_generated_with_gemini =sum(num_valid_objs_generated_with_gemini_by_scenario.values())
num_valid_objs_generated_with_claude = sum(num_valid_objs_generated_with_claude_by_scenario.values())
logger.info(f"\nProblems encountered with Gemini (out of {num_objs_generated_with_gemini} actually-generated objects & as many passages, where the goal had been {target_num_objs_for_gemini}):\n"
f"object generation: {gemini_obj_gen_problem_count}; text passage generation: {gemini_text_gen_problem_count}\n"
f"Problems encountered with Claude (out of {num_objs_generated_with_claude} actually-generated objects & as many passages, where the goal had been {target_num_objs_for_claude}):\n"
f"object generation: {claude_obj_gen_problem_count}; text passage generation: {claude_text_gen_problem_count}\n"
f"Problems encountered with object reconstruction from text passages:\n"
f"When Claude was extracting from Gemini-generated text passages: {reconstruction_from_gemini_texts_problem_count};\n"
f"When Gemini was extracting from Claude-generated text passages: {reconstruction_from_claude_texts_problem_count}\n"
f"Extraction qualities averaged across scenarios for texts made by Gemini: {statistics.mean(list(extraction_qualities_for_gemini_generated_texts.values()))};\n"
f"Extraction qualities averaged across scenarios for texts made by Claude: {statistics.mean(list(extraction_qualities_for_claude_generated_texts.values()))}\n"
f"Number of valid objects generated by Gemini: {num_valid_objs_generated_with_gemini}; "
f"Number of valid objects generated by Claude: {num_valid_objs_generated_with_claude}")
for scenario_idx in range(first_scenario_idx, schema_idx_excl_bound):
logger.info(f"Extraction quality for scenario {scenario_domains[scenario_idx]} - {scenario_text_passage_descriptions[scenario_idx]}:\n"
f"from texts generated by Claude: {extraction_qualities_for_claude_generated_texts[scenario_idx]};\n"
f"from texts generated by Gemini: {extraction_qualities_for_gemini_generated_texts[scenario_idx]}\n"
f"Number of valid objects generated by Gemini: {num_valid_objs_generated_with_gemini_by_scenario[scenario_idx]} out of {num_objs_generated_with_gemini_by_scenario[scenario_idx]} total objects generated by Gemini for that scenario (and where the target number of objects was {target_num_objs_for_gemini_by_scenario[scenario_idx]})\n"
f"Number of valid objects generated by Claude: {num_valid_objs_generated_with_claude_by_scenario[scenario_idx]} out of {num_objs_generated_with_claude_by_scenario[scenario_idx]} total objects generated by Claude for that scenario (and where the target number of objects was {target_num_objs_for_claude_by_scenario[scenario_idx]})")
if __name__ == "__main__":
main()