add arc agi 2 (#642)

NathanHB · web-flow · commit 75f44298a3d7 · 2025-04-07T18:49:30.000+02:00
* add arc agi 2

* add arc agi 2

* add arc agi 2
diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py
@@ -90,6 +90,57 @@ def apps(line, task_name: str = None):
     )
 
 
+def arc_agi_2(line, task_name: str = None):
+    # query from: https://github.com/arcprize/model_baseline/blob/main/src/prompts/system_prompt.txt
+    def convert_2d_list_to_string(list_of_lists: list[list[int]]) -> str:
+        """
+        Convert a list of lists to a string
+        """
+
+        string_list = ""
+
+        for row in list_of_lists:
+            string_list += json.dumps(row) + "\n"
+
+        return string_list
+
+    query = """You are participating in a puzzle solving competition. You are an expert at solving puzzles.
+
+Below is a list of input and output pairs with a pattern. Your goal is to identify the pattern or transformation in the training examples that maps the input to the output, then apply that pattern to the test input to give a final output.
+
+Respond in the format of the training output examples
+
+--Training Examples--
+{training_examples}
+--End of Training Examples--
+
+--Test Input--
+{test_input}
+--End of Test Input--
+
+Your response:""".strip()
+
+    training_pairs = line["fewshots"]
+    training_examples = ""
+    for i, pair in enumerate(training_pairs):
+        training_examples += f"--Example {i}-- \n\n INPUT: \n\n"
+        training_examples += convert_2d_list_to_string(pair["input"]) + "\n\n"
+        training_examples += "OUTPUT: \n\n"
+        training_examples += convert_2d_list_to_string(pair["output"]) + "\n\n"
+
+    test_input = convert_2d_list_to_string(line["question"][0]["input"])
+
+    gold = str(line["question"][0]["output"])
+    query = query.format(training_examples=training_examples, test_input=test_input)
+
+    return Doc(
+        task_name=task_name,
+        query=query,
+        choices=[gold],
+        gold_index=0,
+    )
+
+
 def arc(line, task_name: str = None):
     return Doc(
         task_name=task_name,
diff --git a/src/lighteval/tasks/default_tasks.py b/src/lighteval/tasks/default_tasks.py
@@ -442,6 +442,22 @@
     trust_dataset=True,
     version=0,
 )
+arc_agi_2 = LightevalTaskConfig(
+    name="arc_agi_2",
+    suite=["lighteval"],
+    prompt_function=prompt.arc_agi_2,
+    hf_repo="arc-agi-community/arc-agi-2",
+    hf_subset="default",
+    hf_avail_splits=["train", "test"],
+    evaluation_splits=["test"],
+    few_shots_split=None,
+    few_shots_select=None,
+    generation_size=2048,
+    metric=[Metrics.exact_match],
+    stop_sequence=None,
+    trust_dataset=False,
+    version=0,
+)
 arc_c_letters_original = LightevalTaskConfig(
     name="arc:c:letters",
     suite=["original", "arc"],