Skip to content

Commit

Permalink
Fix Claude 3 run expander
Browse files Browse the repository at this point in the history
yifanmai committed Apr 24, 2024
1 parent ebbb346 commit 818b3d4
Showing 3 changed files with 20 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/helm/benchmark/model_metadata_registry.py
Original file line number Diff line number Diff line change
@@ -159,7 +159,10 @@ def register_model_metadata(model_metadata: ModelMetadata) -> None:
def get_model_metadata(model_name: str) -> ModelMetadata:
"""Return the `ModelMetadata` for the model name."""
if model_name not in MODEL_NAME_TO_MODEL_METADATA:
raise ValueError(f"No model with name: {model_name}")
raise ValueError(
f"No model metadata for model name: {model_name} - "
"did you remember to add this model to model_metadata.yaml?"
)

return MODEL_NAME_TO_MODEL_METADATA[model_name]

10 changes: 10 additions & 0 deletions src/helm/benchmark/run_expander.py
Original file line number Diff line number Diff line change
@@ -322,6 +322,16 @@ class AnthropicClaude3RunExpander(RunExpander):
name = "claude_3"

def expand(self, run_spec: RunSpec) -> List[RunSpec]:
# Remove all stop sequences that do not contain non-whitespace characters.
# This prevents the Anthropic API from returnin the following error:
# "stop_sequences: each stop sequence must contain non-whitespace"
stop_sequences_with_non_whitespace = [
stop_sequence for stop_sequence in run_spec.adapter_spec.stop_sequences if stop_sequence.strip()
]
run_spec = replace(
run_spec,
adapter_spec=replace(run_spec.adapter_spec, stop_sequences=stop_sequences_with_non_whitespace),
)
if run_spec.adapter_spec.method == ADAPT_MULTIPLE_CHOICE_JOINT:
instructions = "Answer with only a single letter."
if run_spec.adapter_spec.instructions:
6 changes: 6 additions & 0 deletions src/helm/benchmark/run_spec_factory.py
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@
from helm.benchmark.model_metadata_registry import (
ANTHROPIC_CLAUDE_1_MODEL_TAG,
ANTHROPIC_CLAUDE_2_MODEL_TAG,
ANTHROPIC_CLAUDE_3_MODEL_TAG,
BUGGY_TEMP_0_TAG,
CHATML_MODEL_TAG,
GOOGLE_GEMINI_MODEL_TAG,
@@ -30,6 +31,7 @@
from helm.benchmark.run_expander import (
RUN_EXPANDERS,
AnthropicClaude2RunExpander,
AnthropicClaude3RunExpander,
ChatMLRunExpander,
GlobalPrefixRunExpander,
IDEFICSInstructRunExpander,
@@ -125,6 +127,10 @@ def alter_run_spec(run_spec: RunSpec) -> RunSpec:
if ANTHROPIC_CLAUDE_1_MODEL_TAG in model.tags or ANTHROPIC_CLAUDE_2_MODEL_TAG in model.tags:
run_spec = singleton(AnthropicClaude2RunExpander().expand(run_spec))

# Anthropic Claude 3
if ANTHROPIC_CLAUDE_3_MODEL_TAG in model.tags:
run_spec = singleton(AnthropicClaude3RunExpander().expand(run_spec))

# Google Gemini Vision returns an empty completion or throws an error if max_tokens is 1
if (
VISION_LANGUAGE_MODEL_TAG in model.tags

0 comments on commit 818b3d4

Please sign in to comment.