Skip to content

Commit

Permalink
Fix caching and content block handling for Gemini (#2514)
Browse files Browse the repository at this point in the history
Co-authored-by: Tony Lee <[email protected]>
  • Loading branch information
yifanmai and teetone authored Mar 31, 2024
1 parent b29fb5e commit 60a7a71
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ openai =
tiktoken~=0.3.3

google =
google-cloud-aiplatform~=1.38.1
google-cloud-aiplatform~=1.44

tsinghua =
icetk~=0.0.4
Expand Down
10 changes: 10 additions & 0 deletions src/helm/benchmark/run_expander.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ def expand(self, run_spec: RunSpec) -> List[RunSpec]:
instructions="I am an expert AI assistant who is here to help you with the following. "
+ adapter_spec.instructions,
)
elif self.value == "prefix_1":
adapter_spec = replace(
adapter_spec,
instructions="Answer only with a single letter. " + adapter_spec.instructions,
)
elif self.value == "suffix_1":
adapter_spec = replace(
adapter_spec,
instructions=adapter_spec.instructions + " Answer only with a single letter.",
)
else:
raise Exception("Unknown value: {self.value}")
return [
Expand Down
54 changes: 51 additions & 3 deletions src/helm/clients/vertexai_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
_models: Dict[str, Any] = {}


class VertexAIContentBlockedError(Exception):
pass


class VertexAIClient(CachingClient, ABC):
"""Client for Vertex AI models"""

Expand Down Expand Up @@ -133,6 +137,17 @@ class VertexAIChatClient(VertexAIClient):
# Gemini returns this error for certain valid requests
CONTENT_HAS_NO_PARTS_ERROR: str = "Content has no parts."

# Enum taken from:
# https://cloud.google.com/vertex-ai/docs/reference/rpc/google.cloud.aiplatform.v1beta1#google.cloud.aiplatform.v1beta1.Candidate.FinishReason
# We don't directly import this enum because it can differ between different Vertex AI library versions.
CONTENT_BLOCKED_FINISH_REASONS: List[int] = [
3, # SAFETY
4, # RECITATION
6, # BLOCKLIST
7, # PROHIBITED_CONTENT
8, # SPII (Sensitive Personally Identifiable Information)
]

@staticmethod
def get_model(model_name: str) -> Any:
global _models_lock
Expand Down Expand Up @@ -186,16 +201,27 @@ def do_it() -> Dict[str, Any]:
contents, generation_config=parameters, safety_settings=self.safety_settings
)
candidates: List[Candidate] = response.candidates

# Depending on the version of the Vertex AI library and the type of content blocking,
# content blocking can show up in many ways, so this defensively handles most of these ways
if not response.candidates:
raise VertexAIContentBlockedError("No candidates in response due to content blocking")
for candidate in response.candidates:
if candidate.finish_reason in VertexAIChatClient.CONTENT_BLOCKED_FINISH_REASONS:
raise VertexAIContentBlockedError(
f"Content blocked with finish reason {candidate.finish_reason}"
)
try:
response_dict = {
"predictions": [{"text": completion.text for completion in candidates}],
"predictions": [{"text": completion.text} for completion in candidates],
} # TODO: Extract more information from the response
except ValueError as e:
if "Content has no parts" in str(e):
# The prediction was either blocked due to safety settings or the model stopped and returned
# nothing (which also happens when the model is blocked).
# In both cases, we return an empty prediction.
return {"predictions": None}
# For now, we don't cache blocked requests, because we are trying to get the
# content blocking removed.
raise VertexAIContentBlockedError("Content has no parts due to content blocking")
return response_dict

# We need to include the engine's name to differentiate among requests made for different model
Expand All @@ -211,10 +237,20 @@ def do_it() -> Dict[str, Any]:
)

response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
except VertexAIContentBlockedError:
return RequestResult(
success=False,
cached=False,
error="Response was empty due to content moderation filter",
completions=[],
embedding=[],
error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
)
except (requests.exceptions.RequestException, AssertionError) as e:
error: str = f"VertexAITextClient error: {e}"
return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])

# Handle cached responses with blocked content from old versions of HELM.
if response["predictions"] is None:
return RequestResult(
success=False,
Expand All @@ -228,6 +264,18 @@ def do_it() -> Dict[str, Any]:
)

for prediction in response["predictions"]:
# Handle cached responses with blocked content from old versions of HELM.
if "text" not in prediction:
return RequestResult(
success=False,
cached=False,
error="Response was empty due to content moderation filter",
completions=[],
embedding=[],
error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
request_time=response["request_time"],
request_datetime=response["request_datetime"],
)
response_text = prediction["text"]

# The Python SDK does not support echo
Expand Down

0 comments on commit 60a7a71

Please sign in to comment.