Skip to content

Commit

Permalink
Add Amazon Titan (#2165)
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanmai authored Feb 17, 2024
1 parent 2db78bc commit aa3e20b
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 5 deletions.
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ aleph-alpha =
aleph-alpha-client~=2.14.0
tokenizers>=0.13.3

amazon =
boto3~=1.28.57
awscli~=1.29.57
botocore~=1.31.57

anthropic =
anthropic~=0.2.5
websocket-client~=1.3.2 # For legacy stanford-online-all-v4-s3
Expand All @@ -134,6 +139,7 @@ yandex =

models =
crfm-helm[aleph-alpha]
crfm-helm[amazon]
crfm-helm[anthropic]
crfm-helm[google]
crfm-helm[mistral]
Expand Down
6 changes: 5 additions & 1 deletion src/helm/benchmark/test_model_deployment_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ def test_models_has_window_service(self, deployment_name: str):
return

# Can't test Vertex AI because it requires Google credentials
if "text-bison" in model.name or "text-unicorn" in model.name:
if deployment_name.startswith("google/"):
return

# Can't test Bedrock because it requires Amazon credentials
if deployment_name.startswith("amazon/"):
return

# Loads the model, window service and tokenizer
Expand Down
24 changes: 24 additions & 0 deletions src/helm/config/model_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,30 @@ model_deployments:
window_service_spec:
class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"


# Amazon
- name: amazon/titan-text-lite-v1
model_name: amazon/titan-text-lite-v1
tokenizer_name: huggingface/gpt2
max_sequence_length: 4000
client_spec:
class_name: "helm.proxy.clients.bedrock_client.BedrockTitanClient"

- name: amazon/titan-tg1-large
model_name: amazon/titan-tg1-large
tokenizer_name: huggingface/gpt2
max_sequence_length: 8000
client_spec:
class_name: "helm.proxy.clients.bedrock_client.BedrockTitanClient"

- name: amazon/titan-text-express-v1
model_name: amazon/titan-text-express-v1
tokenizer_name: huggingface/gpt2
max_sequence_length: 8000
client_spec:
class_name: "helm.proxy.clients.bedrock_client.BedrockTitanClient"


# Anthropic
- name: anthropic/claude-v1.3
model_name: anthropic/claude-v1.3
Expand Down
33 changes: 31 additions & 2 deletions src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,7 @@ models:
# release_date: TBD
# # Does not support echo.
# tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]



- name: AlephAlpha/m-vader
display_name: MultiFusion (13B)
description: MultiFusion is a multimodal, multilingual diffusion model that extend the capabilities of Stable Diffusion v1.4 by integrating different pre-trained modules, which transfers capabilities to the downstream model ([paper](https://arxiv.org/abs/2305.15296))
Expand All @@ -158,6 +157,36 @@ models:
tags: [TEXT_TO_IMAGE_MODEL_TAG]


# Amazon
# References for Amazon Titan models:
# - https://aws.amazon.com/bedrock/titan/
# - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
# - https://aws.amazon.com/about-aws/whats-new/2023/11/amazon-titan-models-express-lite-bedrock/
- name: amazon/titan-text-lite-v1
display_name: Amazon Titan Text Lite
description: Amazon Titan Text Lite is a lightweight, efficient model perfect for fine-tuning English-language tasks like summarization and copywriting. It caters to customers seeking a smaller, cost-effective, and highly customizable model. It supports various formats, including text generation, code generation, rich text formatting, and orchestration (agents). Key model attributes encompass fine-tuning, text generation, code generation, and rich text formatting.
creator_organization_name: Amazon
access: limited
release_date: 2023-11-29
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]

- name: amazon/titan-tg1-large
display_name: Amazon Titan Large
description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
creator_organization_name: Amazon
access: limited
release_date: 2023-11-29
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]

- name: amazon/titan-text-express-v1
display_name: Amazon Titan Text Express
description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
creator_organization_name: Amazon
access: limited
release_date: 2023-11-29
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]


# Anthropic
- name: anthropic/claude-v1.3
display_name: Anthropic Claude v1.3
Expand Down
128 changes: 128 additions & 0 deletions src/helm/proxy/clients/bedrock_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from abc import abstractmethod
from copy import deepcopy
import json
import os
from typing import Any, Dict, List, Mapping, Optional

from helm.common.cache import CacheConfig
from helm.proxy.clients.client import CachingClient, truncate_and_tokenize_response_text
from helm.common.request import Request, RequestResult, Sequence, wrap_request_time
from helm.proxy.clients.bedrock_utils import get_bedrock_client
from helm.proxy.tokenizers.tokenizer import Tokenizer


JSON_CONTENT_TYPE = "application/json"


class BedrockClient(CachingClient):
@abstractmethod
def convert_request_to_raw_request(self, request: Request) -> Dict:
raise NotImplementedError()

@abstractmethod
def convert_raw_response_to_completions(self, response: Dict, request: Request) -> List[Sequence]:
raise NotImplementedError()

def __init__(
self,
cache_config: CacheConfig,
tokenizer: Tokenizer,
tokenizer_name: str,
bedrock_model_id: Optional[str] = None,
assumed_role: Optional[str] = None,
region: Optional[str] = None,
):
super().__init__(cache_config=cache_config)
self.tokenizer = tokenizer
self.tokenizer_name = tokenizer_name
self.bedrock_model_id = bedrock_model_id
self.bedrock_client = get_bedrock_client(
assumed_role=assumed_role or os.environ.get("BEDROCK_ASSUME_ROLE", None),
region=region or os.environ.get("AWS_DEFAULT_REGION", None),
)

def make_request(self, request: Request) -> RequestResult:
# model_id should be something like "amazon.titan-tg1-large"
model_id = self.bedrock_model_id if self.bedrock_model_id else request.model.replace("/", ".")
raw_request = self.convert_request_to_raw_request(request)

# modelId isn't part of raw_request, so it must be explicitly passed into the input to
raw_request_for_cache: Dict = {"modelId": model_id, **deepcopy(raw_request)}
cache_key: Mapping = CachingClient.make_cache_key(raw_request_for_cache, request)

def do_it() -> Dict[Any, Any]:
response = self.bedrock_client.invoke_model(
body=json.dumps(raw_request), modelId=model_id, accept=JSON_CONTENT_TYPE, contentType=JSON_CONTENT_TYPE
)
return json.loads(response.get("body").read())

try:
response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
except Exception as error:
return RequestResult(
success=False,
cached=False,
error=str(error),
completions=[],
embedding=[],
)

completions = self.convert_raw_response_to_completions(response, request)

return RequestResult(
success=True,
cached=cached,
request_time=response["request_time"],
request_datetime=response["request_datetime"],
completions=completions,
embedding=[],
)


class BedrockTitanClient(BedrockClient):
_COMPLETION_REASON_TO_FINISH_REASON = {
"LENGTH": "length",
"FINISH": "endoftext",
}

def convert_request_to_raw_request(self, request: Request) -> Dict:
# TODO: Support the following:
# - top_k_per_token
# - echo_prompt
# - num_completions
return {
"inputText": request.prompt,
"textGenerationConfig": {
"maxTokenCount": request.max_tokens,
# We ignore stop sequences in the request and always set stop sequences to the empty list.
# This is because:
#
# 1. The only permitted stop sequences are "|" and "User:"
# - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-text.html
# - https://github.com/boto/boto3/issues/3993
# - https://github.com/aws/aws-sdk/issues/692
#
# 2. Titan has the tendency to emit "\n" as the first token in the generated text output,
# which would cause the output to stop immediately if "\n" is in the stop_sequences.
"stopSequences": [],
"temperature": request.temperature,
"topP": request.top_p,
},
}

def convert_raw_response_to_completions(self, response: Dict, request: Request) -> List[Sequence]:
# TODO: Support the following:
# - tokens
# - logprob
completions: List[Sequence] = []
for raw_completion in response["results"]:
output_text = raw_completion["outputText"]
# Call lstrip() Titan has the tendency to emit "\n" as the first token in the generated text output.
finish_reason = BedrockTitanClient._COMPLETION_REASON_TO_FINISH_REASON.get(
raw_completion["completionReason"], raw_completion["completionReason"].lower()
)
completion = truncate_and_tokenize_response_text(
output_text.lstrip(), request, self.tokenizer, self.tokenizer_name, finish_reason
)
completions.append(completion)
return completions
72 changes: 72 additions & 0 deletions src/helm/proxy/clients/bedrock_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Helper utilities for working with Amazon Bedrock."""

import os
from typing import Optional

from helm.common.hierarchical_logger import hlog
from helm.common.optional_dependencies import handle_module_not_found_error

try:
import boto3
from botocore.config import Config
except ModuleNotFoundError as e:
handle_module_not_found_error(e, ["aws"])


# From https://github.com/aws-samples/amazon-bedrock-workshop/blob/main/01_Generation/00_generate_w_bedrock.ipynb
# MIT-0 Licensed
def get_bedrock_client(
assumed_role: Optional[str] = None,
region: Optional[str] = None,
runtime: Optional[bool] = True,
):
"""Create a boto3 client for Amazon Bedrock, with optional configuration overrides
Parameters
----------
assumed_role :
Optional ARN of an AWS IAM role to assume for calling the Bedrock service. If not
specified, the current active credentials will be used.
region :
Optional name of the AWS Region in which the service should be called (e.g. "us-east-1").
If not specified, AWS_REGION or AWS_DEFAULT_REGION environment variable will be used.
runtime :
Optional choice of getting different client to perform operations with the Amazon Bedrock service.
"""
if region is None:
target_region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION"))
else:
target_region = region

session_kwargs = {"region_name": target_region}
client_kwargs = {**session_kwargs}

profile_name = os.environ.get("AWS_PROFILE")
if profile_name:
session_kwargs["profile_name"] = profile_name

retry_config = Config(
region_name=target_region,
retries={
"max_attempts": 10,
"mode": "standard",
},
)
session = boto3.Session(**session_kwargs)

if assumed_role:
sts = session.client("sts")
response = sts.assume_role(RoleArn=str(assumed_role), RoleSessionName="crfm-helm")
client_kwargs["aws_access_key_id"] = response["Credentials"]["AccessKeyId"]
client_kwargs["aws_secret_access_key"] = response["Credentials"]["SecretAccessKey"]
client_kwargs["aws_session_token"] = response["Credentials"]["SessionToken"]

if runtime:
service_name = "bedrock-runtime"
else:
service_name = "bedrock"

bedrock_client = session.client(service_name=service_name, config=retry_config, **client_kwargs)

hlog(f"Amazon Bedrock client successfully created with endpoint {bedrock_client._endpoint}")
return bedrock_client
4 changes: 2 additions & 2 deletions src/helm/proxy/clients/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def truncate_sequence(sequence: Sequence, request: Request, print_warning: bool


def truncate_and_tokenize_response_text(
text: str, request: Request, tokenizer: Tokenizer, tokenizer_name: str
text: str, request: Request, tokenizer: Tokenizer, tokenizer_name: str, original_finish_reason: str = "endoftext"
) -> Sequence:
"""Truncate a string-only response to respect stop_sequences and max_tokens.
Expand All @@ -135,7 +135,7 @@ def truncate_and_tokenize_response_text(
This is because the the tokens are derived from the truncated text using the tokenizer,
so the text and the tokens in the resulting result are guranteed to match."""
# Finish reason strings are token from basic_metrics._compute_finish_reason_metrics()
finish_reason: str = "endoftext"
finish_reason: str = original_finish_reason
if request.echo_prompt:
raise Exception("truncate_and_tokenize_response_text() does not support requests with echo_prompt = True")

Expand Down

0 comments on commit aa3e20b

Please sign in to comment.