More fixes

stanford-crfm · Jun 27, 2024 · f1056c7 · f1056c7
1 parent 939afd4
commit f1056c7
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 5 deletions.
diff --git a/src/helm/benchmark/adaptation/common_adapter_specs.py b/src/helm/benchmark/adaptation/common_adapter_specs.py
@@ -67,7 +67,7 @@ def get_multiple_choice_separate_adapter_spec(method: str, empty_input: bool = F
     or
     [reference_i]
     """
-    assert method in {ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL, ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED, ADAPT_MULTIPLE_CHOICE_SEPARATE_LANGUAGE_MODELING}
+    assert method in {ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL, ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED}
 
     return AdapterSpec(
         method=method,

diff --git a/src/helm/benchmark/presentation/run_entries_bhasa.conf b/src/helm/benchmark/presentation/run_entries_bhasa.conf
@@ -58,10 +58,10 @@ entries: [
 
     ###  1. Syntax: Minimal Pairs
     ###  Use this to run the minimal pairs evaluation as a MCQ task
-    {description: "lindsea_syntax_minimal_pairs:model=openai/gpt-3.5-turbo-0125,method=multiple_choice_joint,subset=npis_and_negation", priority: 1},
-    {description: "lindsea_syntax_minimal_pairs:model=openai/gpt-3.5-turbo-0125,method=multiple_choice_joint,subset=argument_structure", priority: 1},
-    {description: "lindsea_syntax_minimal_pairs:model=openai/gpt-3.5-turbo-0125,method=multiple_choice_joint,subset=filler_gap_dependencies", priority: 1},
-    {description: "lindsea_syntax_minimal_pairs:model=openai/gpt-3.5-turbo-0125,method=multiple_choice_joint,subset=morphology", priority: 1},
+    {description: "lindsea_syntax_minimal_pairs:model=text,method=multiple_choice_joint,subset=npis_and_negation", priority: 1},
+    {description: "lindsea_syntax_minimal_pairs:model=text,method=multiple_choice_joint,subset=argument_structure", priority: 1},
+    {description: "lindsea_syntax_minimal_pairs:model=text,method=multiple_choice_joint,subset=filler_gap_dependencies", priority: 1},
+    {description: "lindsea_syntax_minimal_pairs:model=text,method=multiple_choice_joint,subset=morphology", priority: 1},
 
     ###  Use this instead of the above in order to run the minimal pairs evaluation using logprobs
     # {description: "lindsea_syntax_minimal_pairs:model=text,method=probs,language=id" priority: 1},

diff --git a/src/helm/benchmark/run_specs/bhasa_run_specs.py b/src/helm/benchmark/run_specs/bhasa_run_specs.py
@@ -1,5 +1,6 @@
 from helm.benchmark.adaptation.adapter_spec import (
     ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
+    ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED,
 )
 from helm.benchmark.adaptation.common_adapter_specs import (
     get_generation_adapter_spec,
@@ -576,6 +577,11 @@ def get_lindsea_syntax_minimal_pairs_spec(language: str = "id", method: str = "m
             method=ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
             empty_input=True,
         )
+    elif method == "multiple_choice_separate_calibrated":
+        adapter_spec = get_multiple_choice_separate_adapter_spec(
+            method=ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED,
+            empty_input=True,
+        )
     else:
         raise ValueError(f"Unknown method {method}")