Skip to content

Commit

Permalink
[Bugfix][V1] Fix allowed_token_ids for v1 Sampler (#14169)
Browse files Browse the repository at this point in the history
Signed-off-by: Lu Fang <[email protected]>
  • Loading branch information
houseroad authored Mar 5, 2025
1 parent ec79b67 commit 8d6cd32
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
8 changes: 5 additions & 3 deletions vllm/v1/engine/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ def _validate_allowed_token_ids(
return
if params.allowed_token_ids is None:
return
if not all(0 <= tid < self.model_config.vocab_size
for tid in params.allowed_token_ids):
if not params.allowed_token_ids:
raise ValueError("allowed_token_ids is not None and empty!")
vocab_size = self.model_config.get_vocab_size()
if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids):
raise ValueError(
"allowed_token_ids contains out-of-vocab token id")
"allowed_token_ids contains out-of-vocab token id!")

def process_inputs(
self,
Expand Down
8 changes: 7 additions & 1 deletion vllm/v1/worker/gpu_input_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ def __init__(
self.logit_bias: list[Optional[dict[int,
float]]] = [None] * max_num_reqs
self.has_allowed_token_ids: set[str] = set()
# NOTE(lufang): In the mask tensor, if the corresponding token allowed,
# the value is False. Since we use masked_fill_ to set -inf.
self.allowed_token_ids_mask: Optional[torch.Tensor] = None
self.allowed_token_ids_mask_cpu_tensor: Optional[torch.Tensor] = None

Expand Down Expand Up @@ -300,6 +302,7 @@ def add_request(
self.has_allowed_token_ids.add(req_id)
if self.allowed_token_ids_mask_cpu_tensor is None:
# Lazy allocation for this tensor, which can be large.
# False means we don't fill with -inf.
self.allowed_token_ids_mask = torch.zeros(self.max_num_reqs,
self.vocab_size,
dtype=torch.bool,
Expand All @@ -309,8 +312,10 @@ def add_request(
self.vocab_size,
dtype=torch.bool,
device="cpu")
self.allowed_token_ids_mask_cpu_tensor[req_index] = True
# False means we don't fill with -inf.
self.allowed_token_ids_mask_cpu_tensor[req_index][
sampling_params.allowed_token_ids] = True
sampling_params.allowed_token_ids] = False

# Add request lora ID
if request.lora_request:
Expand Down Expand Up @@ -359,6 +364,7 @@ def remove_request(self, req_id: str) -> Optional[int]:
self.logit_bias[req_index] = None
self.has_allowed_token_ids.discard(req_id)
if self.allowed_token_ids_mask_cpu_tensor is not None:
# False means we don't fill with -inf.
self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(False)
return req_index

Expand Down

0 comments on commit 8d6cd32

Please sign in to comment.