Skip to content

Commit

Permalink
add max_pad length as an arg
Browse files Browse the repository at this point in the history
  • Loading branch information
HamidShojanazeri committed Aug 2, 2023
1 parent 6beadcd commit 9b0eae4
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion inference/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def main(
enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api
enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs
enable_saleforce_content_safety: bool=True, # Enable safety check woth Saleforce safety flan t5
max_padding_length: int=None, # the max padding length to be used with tokenizer padding the prompts.
**kwargs
):
if prompt_file is not None:
Expand Down Expand Up @@ -83,7 +84,7 @@ def main(
model = load_peft_model(model, peft_model)

model.eval()
batch = tokenizer(user_prompt, padding='max_length', truncation=True,max_length=420,return_tensors="pt")
batch = tokenizer(user_prompt, padding='max_length', truncation=True,max_length=max_padding_length,return_tensors="pt")
model.resize_token_embeddings(model.config.vocab_size + 1)
batch = {k: v.to("cuda") for k, v in batch.items()}
start = time.perf_counter()
Expand Down

0 comments on commit 9b0eae4

Please sign in to comment.