From 874490b9e1f4e3b1ed945a8cbd8f3b39d536379e Mon Sep 17 00:00:00 2001
From: ftgreat <ftgreat@163.com>
Date: Thu, 28 Sep 2023 09:51:07 +0000
Subject: [PATCH] enabledaquila2 finetuning

Signed-off-by: ftgreat <ftgreat@163.com>
---
 flagai/auto_model/auto_loader.py | 36 ++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/flagai/auto_model/auto_loader.py b/flagai/auto_model/auto_loader.py
index f67e8219..a7fa83ad 100755
--- a/flagai/auto_model/auto_loader.py
+++ b/flagai/auto_model/auto_loader.py
@@ -5,7 +5,6 @@
 import os
 import copy
 from flagai.model.file_utils import _get_model_id, _get_checkpoint_path, _get_vocab_path, _get_model_files
-from flagai.model.aquila2.modeling_aquila import AquilaForCausalLM
 import torch
 
 class LazyImport(object):
@@ -169,7 +168,8 @@ def __init__(self,
                  low_cpu_mem_usage=True,
                  lora_dir=None,
                  qlora_dir=None,
-                 quantization_config=None,
+                 inference_mode=True,
+                 model_max_length=None,
                  **kwargs):
         """
         Args:
@@ -205,6 +205,7 @@ def __init__(self,
             print(f"All supported models are {list(MODEL_DICT.keys())}")
             return
         if task_name == "aquila2":
+            from flagai.model.aquila2.modeling_aquila import AquilaForCausalLM
             download_path = os.path.join(model_dir, model_name)
             
             if not os.path.exists(download_path):
@@ -261,16 +262,29 @@ def __init__(self,
                     bnb_4bit_quant_type="nf4",
                     bnb_4bit_compute_dtype=torch_dtype,
                 )
+            if inference_mode:
+                model = AquilaForCausalLM.from_pretrained(download_path,low_cpu_mem_usage=low_cpu_mem_usage, torch_dtype=torch_dtype,
+                                                        quantization_config=quantization_config)
+                model.eval()
+            else:
+                # Set RoPE scaling factor
+                import transformers 
+                import math 
+                config = transformers.AutoConfig.from_pretrained(
+                    download_path,
+                    cache_dir=kwargs['cache_dir'],
+                    trust_remote_code=True,
+                )
+                orig_ctx_len = getattr(config, "max_position_embeddings", None)
+                if orig_ctx_len and model_max_length > orig_ctx_len:
+                    scaling_factor = float(
+                        math.ceil(model_max_length / orig_ctx_len))
+                    config.rope_scaling = {"type": "linear", "factor": scaling_factor}
+                config.use_cache = False
+                model = AquilaForCausalLM.from_pretrained(download_path,
+                                                        **kwargs)
 
 
-            model = AquilaForCausalLM.from_pretrained(download_path,
-                                                    low_cpu_mem_usage=low_cpu_mem_usage, torch_dtype=torch_dtype,
-                                                    quantization_config=quantization_config)
-            
-            model.eval()
-            # from accelerate import load_checkpoint_and_dispatch
-            # model = load_checkpoint_and_dispatch(
-            #                 model, model_dir+model_name, device_map="balanced", no_split_module_classes=["LlamaDecoderLayer"])
             if not qlora_dir:
                 model.to(device)
             if lora_dir:
@@ -282,7 +296,7 @@ def __init__(self,
                 model = PeftModel.from_pretrained(model, qlora_dir)
                 print("Qlora modules loaded")
             from transformers import AutoTokenizer
-            tokenizer = AutoTokenizer.from_pretrained(model_dir+model_name)
+            tokenizer = AutoTokenizer.from_pretrained(download_path)
             self.model = model 
             self.tokenizer = tokenizer 
         else: