checkout main files

Oneflow-Inc · Sep 10, 2024 · c1d315e · c1d315e
1 parent 12843f1
commit c1d315e
Show file tree

Hide file tree

Showing 7 changed files with 73 additions and 68 deletions.
diff --git a/libai/models/utils/model_loader/base_loader.py b/libai/models/utils/model_loader/base_loader.py
@@ -384,11 +384,6 @@ def _convert_tensor(self, tensor):
         Returns:
             flow.Tensor: The target tensor.
         """
-        import torch
-
-        if tensor.dtype == torch.bfloat16:
-            data = tensor.detach().half().cpu().numpy()
-            return flow.Tensor(data)
         return flow.Tensor(tensor.detach().cpu().numpy())
 
     def _convert_tensors(self, torch_state_dict):

diff --git a/libai/tokenizer/tokenization_base.py b/libai/tokenizer/tokenization_base.py
@@ -805,18 +805,14 @@ def _convert_token_to_id_with_added_voc(self, token):
     def _convert_token_to_id(self, token):
         raise NotImplementedError
 
-    def encode(self, text, return_tensors=None, is_global=False, device="cuda", **kwargs):
+    def encode(self, text, return_tensors=None, is_global=False, **kwargs):
         if isinstance(text, str):
             tokens = self.tokenize(text)
             token_ids = self.convert_tokens_to_ids(tokens)
             if hasattr(self, "build_inputs_with_special_tokens"):
                 token_ids = self.build_inputs_with_special_tokens(token_ids)
             token_ids = self.convert_to_tensors(
-                token_ids,
-                return_tensors=return_tensors,
-                is_global=is_global,
-                device=device,
-                **kwargs,
+                token_ids, return_tensors=return_tensors, is_global=is_global, **kwargs
             )
             return token_ids
         elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], str):

diff --git a/projects/ChatGLM/configs/chatglm_config.py b/projects/ChatGLM/configs/chatglm_config.py
@@ -61,7 +61,7 @@
     output_scores=False,
     output_hidden_states=False,
     # train
-    pretrained_model_path="chatglm/chatglm2-6b",
+    pretrained_model_path=os.environ["CHATGLM_HF_DIR"],
     # lora_cfg
     lora_enable=False,
     lora_cfg=dict(
@@ -86,5 +86,6 @@
 model = LazyCall(ChatGLMForConditionalGeneration)(cfg=cfg)
 tokenization = OmegaConf.create()
 tokenization.make_vocab_size_divisible_by = 1
-tokenization.tokenizer = LazyCall(ChatGLMTokenizer)()
-
+tokenization.tokenizer = LazyCall(ChatGLMTokenizer)(
+    vocab_file=f"{os.environ['CHATGLM_HF_DIR']}/tokenizer.model"
+)
diff --git a/projects/ChatGLM/lora/layers.py b/projects/ChatGLM/lora/layers.py
@@ -49,7 +49,7 @@ class BaseTunerLayer(ABC):
     _disable_adapters: bool = False
 
     # the currently active adapter(s)
-    _active_adapter: Union[str, List[str]] = "default"
+    _active_adapter: str | list[str] = "default"
 
     # List all merged adapters
     merged_adapters: list[str] = []
@@ -119,7 +119,7 @@ def enable_adapters(self, enabled: bool) -> None:
                 layer.requires_grad_(False)
             self._disable_adapters = True
 
-    def set_adapter(self, adapter_names: Union[str, List[str]]) -> None:
+    def set_adapter(self, adapter_names: str | list[str]) -> None:
         """Set the active adapter(s).
 
         Args:

diff --git a/projects/ChatGLM/lora/lora_model.py b/projects/ChatGLM/lora/lora_model.py
@@ -22,7 +22,7 @@
 from dataclasses import asdict
 from enum import Enum
 from itertools import chain
-from typing import Any, List, Union, Optional
+from typing import Any, List, Optional
 
 from oneflow import nn
 from tqdm import tqdm
@@ -404,7 +404,7 @@ def disable_adapter_layers(self) -> None:
                 warnings.warn(msg)
         self._set_adapter_layers(enabled=False)
 
-    def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
+    def set_adapter(self, adapter_name: str | list[str]) -> None:
         """Set the active adapter(s).
 
         Args:

diff --git a/projects/ChatGLM/lora/utils.py b/projects/ChatGLM/lora/utils.py
@@ -15,14 +15,14 @@
 # limitations under the License.
 
 import re
-from typing import List, Union, Optional
+from typing import List
 
 import oneflow as flow
 
 COMMON_LAYERS_PATTERN = ["layers", "h", "block", "blocks", "layer"]
 
 
-def check_target_module_exists(config, key: str) -> Union[bool, Optional[re.Match[str]]]:
+def check_target_module_exists(config, key: str) -> bool | re.Match[str] | None:
     """A helper method to check if the passed module's key name matches
        any of the target modules in the adapter_config.
 

diff --git a/projects/ChatGLM/pipeline.py b/projects/ChatGLM/pipeline.py
@@ -13,14 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-import click
-from typing import Union
-from pathlib import Path
 
 from libai.inference.basic import BasePipeline
 from libai.utils import distributed as dist
-from libai.config import try_get_key
-from libai.engine import DefaultTrainer
 
 
 class TextGenerationPipeline(BasePipeline):
@@ -86,7 +81,7 @@ def load_pretrain_weight(self, libai_cfg_model, model_path, mode="huggingface"):
             return model
 
         elif mode == "random":
-            #from libai.engine import DefaultTrainer
+            from libai.engine import DefaultTrainer
 
             return DefaultTrainer.build_model(self.cfg)
         else:
@@ -99,34 +94,19 @@ def _parse_parameters(self, **pipeline_parameters):
 
         return preprocess_params, forward_params, postprocess_params
 
-    def preprocess(self, sentence: Union[str, list], **kwargs) -> dict:
+    def preprocess(self, sentence: str | list, **kwargs) -> dict:
         #
         if type(sentence) is str:
             inputs = {
                 "inputs": sentence,
             }
         else:
-            inputs = self.tokenizer.encode(
-                sentence, return_tensors="of", is_global=True, device=self.device
-            )
+            inputs = self.tokenizer.encode(sentence, return_tensors="of", is_global=True)
             inputs = {
                 "input_ids": inputs,
             }
         return inputs
 
-    def build_tokenizer(self, cfg):
-        tokenizer = None
-        if try_get_key(cfg, "tokenization") is not None:
-            tokenizer_cfg = cfg.tokenization.tokenizer
-            if "vocab_file" not in tokenizer_cfg:
-                # If "vocab_file" does not exist in the tokenizer's config,
-                # set it to default as f"{model_path}/tokenizer.model"
-                tokenizer_cfg.vocab_file = str(
-                    Path(self.model_path).joinpath("tokenizer.model")
-                )
-            tokenizer = DefaultTrainer.build_tokenizer(cfg)
-        return tokenizer
-
     def forward(self, inputs, **kwargs) -> dict:
         if "input_ids" not in inputs:
             if "history" in kwargs:
@@ -163,52 +143,85 @@ def reset_conversation(self):
         self.history = []
 
 
-@click.command()
-@click.option(
-    "--config_file",
-    default="projects/ChatGLM/configs/chatglm_config.py",
-    help="Path to the configuration file.",
-)
-@click.option("--model_path", default=None, help="Path to the model checkpoint.")
-@click.option(
-    "--mode",
-    default="libai",
-    help="Mode for the dataloader pipeline, e.g., 'libai' or 'huggingface'.",
-)
-@click.option(
-    "--device", default="cuda", help="Device to run the model on, e.g., 'cuda', 'xpu', 'npu'."
-)
-def main(config_file, model_path, mode, device):
+if __name__ == "__main__":
+    # ----- load huggingface checkpoint -----
     text = "浏览器输入www.baidu.com 并且显示网页，从计算机网络的角度说明实现的全过程"
     text2 = (
         "5600分为A、B、C三部分，如果A比C的比例是1/7:1/7:1/14，那么A比C多多少？\n"
         "选项：\n(A) 300\n(B) 992 \n(C) 1120\n(D) 552\n(E) 312 让我们先想想。一些随机推理："
     )
     texts = [
-        text, text2,
         "a dog is flying on the sky",
         "Wikipedia is a free online",
         "what is beam search?",
         "what is beam search?",
     ]
     pipeline = TextGenerationPipeline(
-        config_file,
+        "projects/ChatGLM/configs/chatglm_config.py",
         data_parallel=1,
         tensor_parallel=1,
         pipeline_parallel=1,
         pipeline_num_layers=28,
-        model_path=model_path,
-        mode=mode,
-        device=device,
+        model_path=os.environ["CHATGLM_HF_DIR"],
+        mode="huggingface",
     )
     pipeline.model = pipeline.model.half()
 
     if isinstance(texts, list):
-        output = pipeline(inputs=texts, do_sample=False, max_length=400)
+        output = pipeline(inputs=texts, do_sample=False, max_length=50)
         if dist.is_main_process():
             for text, record in zip(texts, output):
                 print(f"Q:{text}||A:{record}")
 
-
-if __name__ == "__main__":
-    main()
+    # if isinstance(text, str):
+    #     output = pipeline(inputs=text, do_sample=False, max_length=400)
+    #     if dist.is_main_process():
+    #         for record in output:
+    #             print(record["generated_text"])
+    #     pipeline.reset_conversation()
+    #     output = pipeline(inputs=text2, do_sample=False, max_length=400)
+    #     if dist.is_main_process():
+    #         for record in output:
+    #             print(record["generated_text"])
+
+    # # ----- load libai checkpoint -----
+    # pipeline = TextGenerationPipeline(
+    #     "projects/ChatGLM/configs/chatglm_config.py",
+    #     data_parallel=1,
+    #     tensor_parallel=1,
+    #     pipeline_parallel=1,
+    #     pipeline_num_layers=28,
+    #     model_path="/home/lixin/codes/libai/lora_sft_result/model_final/model",
+    #     mode="libai",
+    # )
+    # pipeline.model = pipeline.model.half()
+
+    # if isinstance(texts, list):
+    #     output = pipeline(inputs=texts, do_sample=False, max_length=50)
+    #     if dist.is_main_process():
+    #         for text, record in zip(texts, output):
+    #             print(f"Q:{text}||A:{record}")
+
+    # if isinstance(text, str):
+    #     output = pipeline(inputs=text, do_sample=False, max_length=400)
+    #     if dist.is_main_process():
+    #         for record in output:
+    #             print(record['generated_text'])
+    #     pipeline.reset_conversation()
+    #     output = pipeline(inputs=text2, do_sample=False, max_length=400)
+    #     if dist.is_main_process():
+    #         for record in output:
+    #             print(record['generated_text'])
+
+    # ----- pure huggingface predict -----
+    # from transformers import AutoModel, AutoTokenizer
+
+    # tokenizer = AutoTokenizer.from_pretrained(glm_model_path, trust_remote_code=True)
+    # model = AutoModel.from_pretrained(glm_model_path, trust_remote_code=True).half().cuda()
+    # model = model.eval()
+    # history = []
+    # for _ in range(1):
+    #     response, history = model.chat(
+    #         tokenizer, text, history=history, do_sample=False, max_length=400
+    #     )
+    #     print(response)