Skip to content

Commit

Permalink
Merge pull request #191 from antonioramos1/fix_end_of_header_token_names
Browse files Browse the repository at this point in the history
Fixes end_header_id token name in comments
  • Loading branch information
subramen authored May 14, 2024
2 parents f37d7d3 + b20cad1 commit 14aab04
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions llama/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_encode_message(self):
[
128006, # <|start_header_id|>
882, # "user"
128007, # <|end_of_header|>
128007, # <|end_header_id|>
271, # "\n\n"
2028, 374, 264, 1296, 11914, 13, # This is a test sentence.
128009, # <|eot_id|>
Expand All @@ -70,19 +70,19 @@ def test_encode_dialog(self):
128000, # <|begin_of_text|>
128006, # <|start_header_id|>
9125, # "system"
128007, # <|end_of_header|>
128007, # <|end_header_id|>
271, # "\n\n"
2028, 374, 264, 1296, 11914, 13, # "This is a test sentence."
128009, # <|eot_id|>
128006, # <|start_header_id|>
882, # "user"
128007, # <|end_of_header|>
128007, # <|end_header_id|>
271, # "\n\n"
2028, 374, 264, 2077, 13, # "This is a response.",
128009, # <|eot_id|>
128006, # <|start_header_id|>
78191, # "assistant"
128007, # <|end_of_header|>
128007, # <|end_header_id|>
271, # "\n\n"
]
)

0 comments on commit 14aab04

Please sign in to comment.