From 836ae08d31cc4e86977aba5cc9eee1310ed155fc Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Wed, 10 Jul 2024 15:04:07 +0200 Subject: [PATCH] [Bugfix] MLPSpeculator: Use ParallelLMHead in tie_weights=False case. (#6303) Signed-off-by: Thomas Parnell --- vllm/model_executor/models/mlp_speculator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/mlp_speculator.py b/vllm/model_executor/models/mlp_speculator.py index 97f7ec74292bb..d3aec06a92fdb 100644 --- a/vllm/model_executor/models/mlp_speculator.py +++ b/vllm/model_executor/models/mlp_speculator.py @@ -110,7 +110,7 @@ def __init__(self, config: MLPSpeculatorConfig, **kwargs) -> None: ]) self.head = nn.ModuleList([ - nn.Linear(self.inner_dim, self.vocab_size, bias=False) + ParallelLMHead(self.vocab_size, self.inner_dim, bias=False) for _ in range(self.max_speculative_tokens) ]) self.ln = nn.ModuleList([