Skip to content

Commit

Permalink
enabledaquila2 finetuning
Browse files Browse the repository at this point in the history
Signed-off-by: ftgreat <[email protected]>
  • Loading branch information
ftgreat committed Sep 28, 2023
1 parent fa3ca34 commit 874490b
Showing 1 changed file with 25 additions and 11 deletions.
36 changes: 25 additions & 11 deletions flagai/auto_model/auto_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import os
import copy
from flagai.model.file_utils import _get_model_id, _get_checkpoint_path, _get_vocab_path, _get_model_files
from flagai.model.aquila2.modeling_aquila import AquilaForCausalLM
import torch

class LazyImport(object):
Expand Down Expand Up @@ -169,7 +168,8 @@ def __init__(self,
low_cpu_mem_usage=True,
lora_dir=None,
qlora_dir=None,
quantization_config=None,
inference_mode=True,
model_max_length=None,
**kwargs):
"""
Args:
Expand Down Expand Up @@ -205,6 +205,7 @@ def __init__(self,
print(f"All supported models are {list(MODEL_DICT.keys())}")
return
if task_name == "aquila2":
from flagai.model.aquila2.modeling_aquila import AquilaForCausalLM
download_path = os.path.join(model_dir, model_name)

if not os.path.exists(download_path):
Expand Down Expand Up @@ -261,16 +262,29 @@ def __init__(self,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch_dtype,
)
if inference_mode:
model = AquilaForCausalLM.from_pretrained(download_path,low_cpu_mem_usage=low_cpu_mem_usage, torch_dtype=torch_dtype,
quantization_config=quantization_config)
model.eval()
else:
# Set RoPE scaling factor
import transformers
import math
config = transformers.AutoConfig.from_pretrained(
download_path,
cache_dir=kwargs['cache_dir'],
trust_remote_code=True,
)
orig_ctx_len = getattr(config, "max_position_embeddings", None)
if orig_ctx_len and model_max_length > orig_ctx_len:
scaling_factor = float(
math.ceil(model_max_length / orig_ctx_len))
config.rope_scaling = {"type": "linear", "factor": scaling_factor}
config.use_cache = False
model = AquilaForCausalLM.from_pretrained(download_path,
**kwargs)


model = AquilaForCausalLM.from_pretrained(download_path,
low_cpu_mem_usage=low_cpu_mem_usage, torch_dtype=torch_dtype,
quantization_config=quantization_config)

model.eval()
# from accelerate import load_checkpoint_and_dispatch
# model = load_checkpoint_and_dispatch(
# model, model_dir+model_name, device_map="balanced", no_split_module_classes=["LlamaDecoderLayer"])
if not qlora_dir:
model.to(device)
if lora_dir:
Expand All @@ -282,7 +296,7 @@ def __init__(self,
model = PeftModel.from_pretrained(model, qlora_dir)
print("Qlora modules loaded")
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_dir+model_name)
tokenizer = AutoTokenizer.from_pretrained(download_path)
self.model = model
self.tokenizer = tokenizer
else:
Expand Down

0 comments on commit 874490b

Please sign in to comment.