Skip to content

Commit

Permalink
updated model usage method
Browse files Browse the repository at this point in the history
Signed-off-by: 严照东 <[email protected]>
  • Loading branch information
严照东 committed Sep 26, 2023
1 parent 4986a94 commit 2fb5318
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 262 deletions.
29 changes: 0 additions & 29 deletions examples/Aquila2/generate_chat.py

This file was deleted.

63 changes: 54 additions & 9 deletions flagai/auto_model/auto_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import importlib
import os
import copy
from flagai.model.file_utils import _get_model_id
from flagai.model.file_utils import _get_model_id, _get_checkpoint_path, _get_vocab_path, _get_model_files
from flagai.model.aquila2.modeling_aquila import AquilaForCausalLM
import torch


class LazyImport(object):

def __init__(self, name):
Expand Down Expand Up @@ -207,8 +207,54 @@ def __init__(self,
print(f"All supported models are {list(MODEL_DICT.keys())}")
return
if task_name == "aquila2":
from flagai.model.aquila2_model import Aquila2Model
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
download_path = os.path.join(model_dir, model_name)

if not os.path.exists(download_path):
# Try to download from ModelHub
try:
model_id = _get_model_id(model_name)
except:
raise FileNotFoundError("Model name not found in local path and ModelHub")
if model_id and model_id != "null":
model_files = eval(_get_model_files(model_name))
print("model files:" + str(model_files))
for file_name in model_files:
if not file_name.endswith("bin"):
_get_vocab_path(download_path, file_name, model_id)

if os.path.exists(
os.path.join(download_path, 'config.json')):
if os.getenv('ENV_TYPE') == 'deepspeed+mpu':
model_parallel_size = int(os.getenv("MODEL_PARALLEL_SIZE"))
if model_parallel_size > 1:
# if gpus == nums_of_modelhub_models
# can load
# else need to download the pytorch_model.bin and to recut.
model_hub_parallel_size = 0
for f in model_files:
if "pytorch_model_" in f:
model_hub_parallel_size += 1
else:
model_parallel_size = 1

if "pytorch_model_01.bin" in model_files and model_parallel_size > 1 and model_hub_parallel_size == model_parallel_size:
# Only to download the model slices(megatron-lm).
for file_to_load in model_files:
if "pytorch_model_" in file_to_load:
_get_checkpoint_path(download_path, file_to_load,
model_id)

elif 'pytorch_model.bin' in model_files:
checkpoint_path = _get_checkpoint_path(
download_path, 'pytorch_model.bin', model_id)
else:
checkpoint_merge = {}
# maybe multi weights files
for file_to_load in model_files:
if "pytorch_model-0" in file_to_load:
_get_checkpoint_path(download_path, file_to_load,
model_id)

if qlora_dir:
from transformers import BitsAndBytesConfig
quantization_config=BitsAndBytesConfig(
Expand All @@ -217,11 +263,14 @@ def __init__(self,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch_dtype,
)
model = Aquila2Model.from_pretrain(model_dir, model_name,


model = AquilaForCausalLM.from_pretrained(download_path,
low_cpu_mem_usage=low_cpu_mem_usage, torch_dtype=torch_dtype,
quantization_config=quantization_config)

model.eval()
# from accelerate import load_checkpoint_and_dispatch
# model = load_checkpoint_and_dispatch(
# model, model_dir+model_name, device_map="balanced", no_split_module_classes=["LlamaDecoderLayer"])
if not qlora_dir:
Expand All @@ -236,13 +285,9 @@ def __init__(self,
print("Qlora modules loaded")
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_dir+model_name)
#args.cuda_index = 0
# device = f"cuda"
self.model = model
self.tokenizer = tokenizer

else:

brief_model_name = MODEL_DICT[model_name][2]
model_type = MODEL_DICT[model_name][3]
# The dir to save config, vocab and model.
Expand Down
169 changes: 169 additions & 0 deletions flagai/model/aquila2/modeling_aquila.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
from .configuration_aquila import AquilaConfig
from transformers import (
LogitsProcessorList,
MinLengthLogitsProcessor,
TopKLogitsWarper,
TemperatureLogitsWarper,
TopPLogitsWarper,
StoppingCriteriaList,
MaxLengthCriteria,
BitsAndBytesConfig,
)
from .utils import *


logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -754,6 +765,57 @@ def __init__(self, config):
# Initialize weights and apply final processing
self.post_init()

@classmethod
def from_pretrain(self, model_dir, model_name, **kwargs):
download_path = os.path.join(model_dir, model_name)
if os.path.exists(download_path):
return self.from_pretrained(download_path, **kwargs)


config_path = os.path.join(download_path, "config.json")
checkpoint_path = os.path.join(download_path, "pytorch_model.bin")
from flagai.model.file_utils import _get_model_id
model_id = _get_model_id(model_name)
if model_id and model_id != "null":
model_files = eval(_get_model_files(model_name))
print("model files:" + str(model_files))
for file_name in model_files:
if not file_name.endswith("bin"):
_get_vocab_path(download_path, file_name, model_id)

if os.path.exists(
os.path.join(download_path, 'config.json')):
if os.getenv('ENV_TYPE') == 'deepspeed+mpu':
model_parallel_size = int(os.getenv("MODEL_PARALLEL_SIZE"))
if model_parallel_size > 1:
# if gpus == nums_of_modelhub_models
# can load
# else need to download the pytorch_model.bin and to recut.
model_hub_parallel_size = 0
for f in model_files:
if "pytorch_model_" in f:
model_hub_parallel_size += 1
else:
model_parallel_size = 1

if "pytorch_model_01.bin" in model_files and model_parallel_size > 1 and model_hub_parallel_size == model_parallel_size:
# Only to download the model slices(megatron-lm).
for file_to_load in model_files:
if "pytorch_model_" in file_to_load:
_get_checkpoint_path(download_path, file_to_load,
model_id)

elif 'pytorch_model.bin' in model_files:
checkpoint_path = _get_checkpoint_path(
download_path, 'pytorch_model.bin', model_id)
else:
checkpoint_merge = {}
# maybe multi weights files
for file_to_load in model_files:
if "pytorch_model-0" in file_to_load:
_get_checkpoint_path(download_path, file_to_load,
model_id)

def get_input_embeddings(self):
return self.model.embed_tokens

Expand Down Expand Up @@ -905,6 +967,113 @@ def _reorder_cache(past_key_values, beam_idx):
)
return reordered_past

def predict(self, text, tokenizer=None,
max_gen_len=200, top_p=0.95,
seed=1234, topk=100,
temperature=0.9,
sft=True, convo_template = "aquila-chat",
device = "cuda"):

vocab = tokenizer.get_vocab()
#device = device
id2word = {v:k for k, v in vocab.items()}


set_random_seed(seed)
if temperature == 0:
topk = 1
temperature = 1.0
if sft:
tokens = covert_prompt_to_input_ids_with_history(text, history=[], tokenizer=tokenizer, max_token=2048, convo_template=convo_template)
tokens = torch.tensor(tokens)[None,].to(device)
else :
tokens = tokenizer.encode_plus(text)["input_ids"]
print(tokenizer.decode(tokens))
tokens = torch.tensor(tokens)[None,].to(device)
input_length = len(tokens[0])
with torch.no_grad():

# instantiate logits processors
logits_processor = LogitsProcessorList(
[
MinLengthLogitsProcessor(1, eos_token_id=100007),
]
)
# instantiate logits processors
logits_warper = LogitsProcessorList(
[
TopPLogitsWarper(top_p),
TopKLogitsWarper(topk),
TemperatureLogitsWarper(temperature),

]
)

stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=input_length + max_gen_len)])
out = self.sample(
tokens,
logits_processor=logits_processor,
logits_warper=logits_warper,
stopping_criteria=stopping_criteria,
return_dict_in_generate=True,
output_scores=True,
)


# print(out)
out_ids = out["sequences"][0][input_length:].cpu().numpy()

out_scores = out["scores"]

out_scores = torch.cat(out_scores, dim=0)
out_scores = torch.nn.functional.softmax(out_scores, dim=-1).cpu().numpy()

probs = []
for i in range(len(out_ids)):
probs.append(float(out_scores[i][out_ids[i]]))

# print(f"probs is {probs}")

convert_tokens = []
for t in out_ids:
if t == 100006:
convert_tokens.append("[CLS]")
else :
convert_tokens.append(id2word.get(t, "[unkonwn_token]"))

out_text = tokenizer.decode(out_ids.tolist())


out = out_text

if "###" in out:
special_index = out.index("###")
out = out[: special_index]
token_length = len(tokenizer.encode_plus(out)["input_ids"])
convert_tokens = convert_tokens[:token_length]
probs = probs[:token_length]

if "[UNK]" in out:
special_index = out.index("[UNK]")
out = out[:special_index]
token_length = len(tokenizer.encode_plus(out)["input_ids"])
convert_tokens = convert_tokens[:token_length]
probs = probs[:token_length]

if "</s>" in out:
special_index = out.index("</s>")
out = out[: special_index]
token_length = len(tokenizer.encode_plus(out)["input_ids"])
convert_tokens = convert_tokens[:token_length]
probs = probs[:token_length]

if len(out) > 0 and out[0] == " ":
out = out[1:]

convert_tokens = convert_tokens[1:]
probs = probs[1:]
return out

@add_start_docstrings(
"""
The LLaMa Model transformer with a sequence classification head on top (linear layer).
Expand Down
38 changes: 38 additions & 0 deletions flagai/model/aquila2/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import random
import numpy as np
import torch
from fastchat.conversation import get_conv_template

def set_random_seed(seed):
"""Set random seed for reproducability."""
if seed is not None and seed > 0:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)



def covert_prompt_to_input_ids_with_history(text, history, tokenizer, max_token, convo_template="aquila-chat"):
# aquila-chat as default
conv = get_conv_template(convo_template)

conv.append_message(conv.roles[1], None)
conv.append_message(conv.roles[0], text)

example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']

while(len(history) > 0 and (len(example) < max_token)):
tmp = history.pop()
if tmp[0] == 'ASSISTANT':
conv.append_message(conv.roles[1], tmp[1])
else:
conv.append_message(conv.roles[0], tmp[1])
example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']

if len(example) >= max_token:
conv.messages.pop()
conv.messages = conv.messages[::-1]
print('model in:', conv.get_prompt())
example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']

return example
Loading

0 comments on commit 2fb5318

Please sign in to comment.