Skip to content

Commit

Permalink
[Docs] Change to llama3.2 for examples
Browse files Browse the repository at this point in the history
  • Loading branch information
CharlieFRuan committed Nov 22, 2024
1 parent 6485713 commit 33ef421
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 16 deletions.
8 changes: 4 additions & 4 deletions docs/how_to/ebnf_guided_generation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ your choice.
.. code:: python
# Get tokenizer info
model_id = "Qwen/Qwen2.5-0.5B-Instruct"
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id)
# This can be larger than tokenizer.vocab_size due to paddings
Expand Down Expand Up @@ -83,8 +83,8 @@ for batched inference.
.. code:: python
# Here we simulate a valid sampled response
sim_sampled_response = '(5+3)*2=16<|endoftext|>'
sim_sampled_token_ids = tokenizer.encode(sim_sampled_response)
sim_sampled_response = '(5+3)*2=16<|end_of_text|>'
sim_sampled_token_ids = tokenizer.encode(sim_sampled_response, add_special_tokens=False)
# Each loop iteration is a simulated auto-regressive step
for i, sim_token_id in enumerate(sim_sampled_token_ids):
Expand All @@ -107,7 +107,7 @@ for batched inference.
# assert matcher.accept_token(next_token_id)
assert matcher.accept_token(sim_token_id)
# Since we accepted a stop token `<|endoftext|>`, we have terminated
# Since we accepted a stop token `<|end_of_text|>`, we have terminated
assert matcher.is_terminated()
# Reset to be ready for the next auto-regressive generation
Expand Down
19 changes: 11 additions & 8 deletions docs/how_to/engine_integration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ logits. To be safe, always pass in the former when instantiating ``xgr.Tokenizer
.. code:: python
# Get tokenizer info
model_id = "Qwen/Qwen2.5-0.5B-Instruct"
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id)
# This can be larger than tokenizer.vocab_size due to paddings
Expand Down Expand Up @@ -120,8 +120,8 @@ for the next generation.
.. code:: python
# Here we simulate a valid sampled response
sim_sampled_response = '{ "library": "xgrammar" }<|endoftext|>'
sim_sampled_token_ids = tokenizer.encode(sim_sampled_response)
sim_sampled_response = '{ "library": "xgrammar" }<|end_of_text|>'
sim_sampled_token_ids = tokenizer.encode(sim_sampled_response, add_special_tokens=False)
# Allocate a token bitmask
token_bitmask = xgr.allocate_token_bitmask(1, tokenizer_info.vocab_size)
Expand All @@ -147,7 +147,7 @@ for the next generation.
# assert matcher.accept_token(next_token_id)
assert matcher.accept_token(sim_token_id)
# Since we accepted a stop token `<|endoftext|>`, we have terminated
# Since we accepted a stop token `<|end_of_text|>`, we have terminated
assert matcher.is_terminated()
# Reset to be ready for the next auto-regressive generation
Expand All @@ -174,7 +174,7 @@ to generate a valid JSON.
from transformers import AutoTokenizer, AutoConfig
# Get tokenizer info
model_id = "Qwen/Qwen2.5-0.5B-Instruct"
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id)
# This can be larger than tokenizer.vocab_size due to paddings
Expand Down Expand Up @@ -207,8 +207,11 @@ each request has its own ``xgr.GrammarMatcher`` to maintain.

.. code:: python
sim_sampled_responses = ['{"name": "a"}<|endoftext|>', '{"name": "b"}<|endoftext|>']
sim_sampled_token_ids = [tokenizer.encode(response) for response in sim_sampled_responses]
sim_sampled_responses = ['{"name": "a"}<|end_of_text|>', '{"name": "b"}<|end_of_text|>']
sim_sampled_token_ids = [
tokenizer.encode(response, add_special_tokens=False)
for response in sim_sampled_responses
]
# Each loop iteration is a simulated auto-regressive step
for loop_iter in range(len(sim_sampled_token_ids[0])):
Expand Down Expand Up @@ -237,7 +240,7 @@ each request has its own ``xgr.GrammarMatcher`` to maintain.
matchers[i].accept_token(sim_sampled_token_ids[i][loop_iter])
# In our simulated case, all requests should have terminated since we accepted
# a stop token `<|endoftext|>`
# a stop token `<|end_of_text|>`
for i in range(batch_size):
assert matchers[i].is_terminated()
# Reset to be ready for the next generation
Expand Down
8 changes: 4 additions & 4 deletions docs/how_to/json_generation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ your choice.
.. code:: python
# Get tokenizer info
model_id = "Qwen/Qwen2.5-0.5B-Instruct"
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id)
# This can be larger than tokenizer.vocab_size due to paddings
Expand Down Expand Up @@ -110,8 +110,8 @@ for batched inference.
.. code:: python
# Here we simulate a valid sampled response
sim_sampled_response = '{ "library": "xgrammar" }<|endoftext|>'
sim_sampled_token_ids = tokenizer.encode(sim_sampled_response)
sim_sampled_response = '{ "library": "xgrammar" }<|end_of_text|>'
sim_sampled_token_ids = tokenizer.encode(sim_sampled_response, add_special_tokens=False)
# Each loop iteration is a simulated auto-regressive step
for i, sim_token_id in enumerate(sim_sampled_token_ids):
Expand All @@ -134,7 +134,7 @@ for batched inference.
# assert matcher.accept_token(next_token_id)
assert matcher.accept_token(sim_token_id)
# Since we accepted a stop token `<|endoftext|>`, we have terminated
# Since we accepted a stop token `<|end_of_text|>`, we have terminated
assert matcher.is_terminated()
# Reset to be ready for the next auto-regressive generation
Expand Down

0 comments on commit 33ef421

Please sign in to comment.