Skip to content

Commit

Permalink
Add OLMo on Together (#2425)
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanmai authored Mar 5, 2024
1 parent 57e1994 commit 3e2c6a1
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 0 deletions.
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ aleph-alpha =
aleph-alpha-client~=2.14.0
tokenizers>=0.13.3

allenai =
ai2-olmo~=0.2

amazon =
boto3~=1.28.57
awscli~=1.29.57
Expand Down Expand Up @@ -139,6 +142,7 @@ yandex =

models =
crfm-helm[aleph-alpha]
crfm-helm[allenai]
crfm-helm[amazon]
crfm-helm[anthropic]
crfm-helm[google]
Expand Down
24 changes: 24 additions & 0 deletions src/helm/config/model_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1501,6 +1501,30 @@ model_deployments:
args:
together_model: zero-one-ai/Yi-34B


# Allen Institute for AI
- name: together/olmo-7b
model_name: allenai/olmo-7b
tokenizer_name: allenai/olmo-7b
max_sequence_length: 2047
client_spec:
class_name: "helm.clients.together_client.TogetherClient"

- name: together/olmo-7b-twin-2t
model_name: allenai/olmo-7b-twin-2t
tokenizer_name: allenai/olmo-7b
max_sequence_length: 2047
client_spec:
class_name: "helm.clients.together_client.TogetherClient"

- name: together/olmo-7b-instruct
model_name: allenai/olmo-7b-instruct
tokenizer_name: allenai/olmo-7b
max_sequence_length: 2047
client_spec:
class_name: "helm.clients.together_client.TogetherClient"


## MistralAI
- name: together/mistral-7b-v0.1
model_name: mistralai/mistral-7b-v0.1
Expand Down
31 changes: 31 additions & 0 deletions src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,37 @@ models:
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]


# Allen Institute for AI
# OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
- name: allenai/olmo-7b
display_name: OLMo (7B)
description: OLMo is a series of Open Language Models trained on the Dolma dataset.
creator_organization_name: Allen Institute for AI
access: open
num_parameters: 7000000000
release_date: 2024-02-01
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]

- name: allenai/olmo-7b-twin-2t
display_name: OLMo (7B Twin 2T)
description: OLMo is a series of Open Language Models trained on the Dolma dataset.
creator_organization_name: Allen Institute for AI
access: open
num_parameters: 7000000000
release_date: 2024-02-01
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]

- name: allenai/olmo-7b-twin-2t
display_name: OLMo (7B Instruct)
description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
creator_organization_name: Allen Institute for AI
access: open
num_parameters: 7000000000
release_date: 2024-02-01
# TODO: Add instruct tag.
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]


# Mistral AI
- name: mistralai/mistral-7b-v0.1
display_name: Mistral v0.1 (7B)
Expand Down
14 changes: 14 additions & 0 deletions src/helm/config/tokenizer_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,20 @@ tokenizer_configs:
end_of_text_token: "</s>"
prefix_token: "<s>"


# Allen Institute for AI
# The allenai/olmo-7b requires Python 3.9 or newer.
# To use the allenai/olmo-7b tokenizer, run `pip install crfm-helm[allenai]` first.
- name: allenai/olmo-7b
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
trust_remote_code: true
end_of_text_token: "<|endoftext|>"
prefix_token: ""


# Microsoft
- name: microsoft/phi-2
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
Expand Down

0 comments on commit 3e2c6a1

Please sign in to comment.