From 874490b9e1f4e3b1ed945a8cbd8f3b39d536379e Mon Sep 17 00:00:00 2001 From: ftgreat Date: Thu, 28 Sep 2023 09:51:07 +0000 Subject: [PATCH] enabledaquila2 finetuning Signed-off-by: ftgreat --- flagai/auto_model/auto_loader.py | 36 ++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/flagai/auto_model/auto_loader.py b/flagai/auto_model/auto_loader.py index f67e8219..a7fa83ad 100755 --- a/flagai/auto_model/auto_loader.py +++ b/flagai/auto_model/auto_loader.py @@ -5,7 +5,6 @@ import os import copy from flagai.model.file_utils import _get_model_id, _get_checkpoint_path, _get_vocab_path, _get_model_files -from flagai.model.aquila2.modeling_aquila import AquilaForCausalLM import torch class LazyImport(object): @@ -169,7 +168,8 @@ def __init__(self, low_cpu_mem_usage=True, lora_dir=None, qlora_dir=None, - quantization_config=None, + inference_mode=True, + model_max_length=None, **kwargs): """ Args: @@ -205,6 +205,7 @@ def __init__(self, print(f"All supported models are {list(MODEL_DICT.keys())}") return if task_name == "aquila2": + from flagai.model.aquila2.modeling_aquila import AquilaForCausalLM download_path = os.path.join(model_dir, model_name) if not os.path.exists(download_path): @@ -261,16 +262,29 @@ def __init__(self, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch_dtype, ) + if inference_mode: + model = AquilaForCausalLM.from_pretrained(download_path,low_cpu_mem_usage=low_cpu_mem_usage, torch_dtype=torch_dtype, + quantization_config=quantization_config) + model.eval() + else: + # Set RoPE scaling factor + import transformers + import math + config = transformers.AutoConfig.from_pretrained( + download_path, + cache_dir=kwargs['cache_dir'], + trust_remote_code=True, + ) + orig_ctx_len = getattr(config, "max_position_embeddings", None) + if orig_ctx_len and model_max_length > orig_ctx_len: + scaling_factor = float( + math.ceil(model_max_length / orig_ctx_len)) + config.rope_scaling = {"type": "linear", "factor": scaling_factor} + config.use_cache = False + model = AquilaForCausalLM.from_pretrained(download_path, + **kwargs) - model = AquilaForCausalLM.from_pretrained(download_path, - low_cpu_mem_usage=low_cpu_mem_usage, torch_dtype=torch_dtype, - quantization_config=quantization_config) - - model.eval() - # from accelerate import load_checkpoint_and_dispatch - # model = load_checkpoint_and_dispatch( - # model, model_dir+model_name, device_map="balanced", no_split_module_classes=["LlamaDecoderLayer"]) if not qlora_dir: model.to(device) if lora_dir: @@ -282,7 +296,7 @@ def __init__(self, model = PeftModel.from_pretrained(model, qlora_dir) print("Qlora modules loaded") from transformers import AutoTokenizer - tokenizer = AutoTokenizer.from_pretrained(model_dir+model_name) + tokenizer = AutoTokenizer.from_pretrained(download_path) self.model = model self.tokenizer = tokenizer else: