diff --git a/vllm/model_executor/models/stablelm.py b/vllm/model_executor/models/stablelm.py index cf842d087669f..95e5ad8ede63e 100644 --- a/vllm/model_executor/models/stablelm.py +++ b/vllm/model_executor/models/stablelm.py @@ -98,7 +98,7 @@ def __init__(self, self.scaling = self.head_dim**-0.5 self.q_size = self.num_heads * self.head_dim self.kv_size = self.num_key_value_heads * self.head_dim - + self.qkv_bias = getattr(config, "use_qkv_bias", False) if (self.head_dim * self.num_heads * tp_size) != self.hidden_size: raise ValueError( f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}" @@ -108,7 +108,7 @@ def __init__(self, self.head_dim, self.total_num_heads, self.total_num_key_value_heads, - bias=False, + self.qkv_bias, linear_method=linear_method) self.o_proj = RowParallelLinear(self.total_num_heads * self.head_dim, self.hidden_size,