From a6c4977690736cb69e4817b9a78bdbf41be8382e Mon Sep 17 00:00:00 2001 From: irexyc Date: Fri, 8 Dec 2023 10:29:40 +0800 Subject: [PATCH] support convert embeddings to bf16 --- lmdeploy/turbomind/turbomind.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py index 84b3caa8ec..144a7978b0 100644 --- a/lmdeploy/turbomind/turbomind.py +++ b/lmdeploy/turbomind/turbomind.py @@ -558,10 +558,14 @@ def _broadcast_np(data, dtype, shape=(batch_size, )): embedding_ends = [embedding_ends] embeddings = [embeddings] # convert to lookup table type - # TODO bf16 if self.tm_model.config.weight_type == 'fp32': embeddings = [[x.astype(np.float32) for x in y] for y in embeddings] + elif self.tm_model.config.weight_type == 'bf16': + embeddings = [[ + torch.from_numpy(x).bfloat16().view(torch.half).numpy() + for x in y + ] for y in embeddings] else: embeddings = [[x.astype(np.float16) for x in y] for y in embeddings]