From 55a28eb82724e5fc9b4f1fe6507c51e2389da026 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Fri, 26 Apr 2024 13:50:16 +0100 Subject: [PATCH] Add `model_max_length` to AutoTokenizer (#3342) --- examples/quickstart-huggingface/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quickstart-huggingface/client.py b/examples/quickstart-huggingface/client.py index db8bf51d36da..a9d48bfa8f13 100644 --- a/examples/quickstart-huggingface/client.py +++ b/examples/quickstart-huggingface/client.py @@ -24,7 +24,7 @@ def load_data(partition_id): # Divide data: 80% train, 20% test partition_train_test = partition.train_test_split(test_size=0.2, seed=42) - tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT) + tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT, model_max_length=512) def tokenize_function(examples): return tokenizer(examples["text"], truncation=True)