diff --git a/vllm/block.py b/vllm/block.py index 70479a995e65b..dc8b94c8ca4d4 100644 --- a/vllm/block.py +++ b/vllm/block.py @@ -60,6 +60,7 @@ def __getitem__(self, key): return self._blocks[key] if "TYPE_CHECKING": + def __iter__(self) -> Iterator[PhysicalTokenBlock]: raise RuntimeError("Method should be automatically generated") diff --git a/vllm/core/block_manager_v1.py b/vllm/core/block_manager_v1.py index a507641f93df4..b7fe8d3fb33f7 100644 --- a/vllm/core/block_manager_v1.py +++ b/vllm/core/block_manager_v1.py @@ -321,7 +321,8 @@ def _allocate_sequence(self, \ for logical_idx in range(num_prompt_blocks): if (self.block_sliding_window is not None and logical_idx >= self.block_sliding_window): - block = block_table[logical_idx % self.block_sliding_window] + block = block_table[logical_idx % + self.block_sliding_window] # Set the reference counts of the token blocks. block.ref_count = ref_count elif not is_encoder_decoder and self.enable_caching: diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index 44728c69f461b..1a3d04859701c 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -288,7 +288,8 @@ def scheduler_running_outputs_builder(): def scheduled_seq_group_builder(): - return ScheduledSequenceGroup(SequenceGroup("", [], -1), token_chunk_size=0) + return ScheduledSequenceGroup(SequenceGroup("", [], -1), + token_chunk_size=0) # return ScheduledSequenceGroup(seq_group=None, token_chunk_size=0)