From a6c4977690736cb69e4817b9a78bdbf41be8382e Mon Sep 17 00:00:00 2001
From: irexyc <irexyc@gmail.com>
Date: Fri, 8 Dec 2023 10:29:40 +0800
Subject: [PATCH] support convert embeddings to bf16

---
 lmdeploy/turbomind/turbomind.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py
index 84b3caa8ec..144a7978b0 100644
--- a/lmdeploy/turbomind/turbomind.py
+++ b/lmdeploy/turbomind/turbomind.py
@@ -558,10 +558,14 @@ def _broadcast_np(data, dtype, shape=(batch_size, )):
                 embedding_ends = [embedding_ends]
                 embeddings = [embeddings]
             # convert to lookup table type
-            # TODO bf16
             if self.tm_model.config.weight_type == 'fp32':
                 embeddings = [[x.astype(np.float32) for x in y]
                               for y in embeddings]
+            elif self.tm_model.config.weight_type == 'bf16':
+                embeddings = [[
+                    torch.from_numpy(x).bfloat16().view(torch.half).numpy()
+                    for x in y
+                ] for y in embeddings]
             else:
                 embeddings = [[x.astype(np.float16) for x in y]
                               for y in embeddings]