Skip to content

Commit

Permalink
refine inference (#402)
Browse files Browse the repository at this point in the history
* refine inference

* replace bincount

* delete num_tokentypes

* add build model time to logger

* fix oom

* refine

* impoort from t5_dataset

* refine

* reformat

* reformat

* refine mt5 pretrain config
  • Loading branch information
xiezipeng-ML authored Oct 27, 2022
1 parent e9ca408 commit 9a4af26
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 36 deletions.
1 change: 0 additions & 1 deletion configs/common/models/t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
embedding_dropout_prob=0.1,
num_tokentypes=0,
initializer_range=0.02,
layernorm_eps=1e-5,
bias_gelu_fusion=True,
Expand Down
10 changes: 10 additions & 0 deletions libai/engine/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,17 @@ def __init__(self, cfg):
self.auto_scale_hyperparams(cfg, self.train_loader)

# Assume these objects must be constructed in this order.
dist.synchronize()
start_time = time.time()
logger.info("> Start building model...")
self.model = self.build_model(cfg)

dist.synchronize()
logger.info(
">>> done with building model. "
"Building time: {:.3f} seconds".format(time.time() - start_time)
)

self.optimizer = self.build_optimizer(cfg, self.model)
self.lr_scheduler = self.build_lr_scheduler(cfg, self.optimizer)

Expand Down
18 changes: 1 addition & 17 deletions libai/inference/generator/generation_logits_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@
import math
from typing import Callable, List, Tuple

import numpy as np
import oneflow as flow

from libai.utils import distributed as dist


class LogitsProcessorList(list):
def __call__(self, input_ids: flow.Tensor, scores: flow.Tensor, **kwargs) -> flow.Tensor:
Expand Down Expand Up @@ -125,20 +122,7 @@ def __call__(self, input_ids, scores, current_tokens, beam_group_idx) -> flow.Te
previous_group_tokens = current_tokens[
batch_idx * self._num_beams : batch_idx * self._num_beams + group_start_idx
]
# TODO: bincount
previous_group_tokens = (
previous_group_tokens.to_global(
sbp=dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]),
placement=flow.placement("cuda", list(range(dist.get_world_size()))),
)
.to_local()
.numpy()
)
token_frequency = np.bincount(previous_group_tokens, minlength=vocab_size)
token_frequency = token_frequency.to_global(
sbp=dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]),
placement=flow.placement("cuda", list(range(dist.get_world_size()))),
)
token_frequency = flow.bincount(previous_group_tokens, minlength=vocab_size)
scores[batch_idx * group_size : (batch_idx + 1) * group_size] = (
scores[batch_idx * group_size : (batch_idx + 1) * group_size]
- self._diversity_penalty * token_frequency
Expand Down
16 changes: 14 additions & 2 deletions libai/inference/generator/generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def greedy_search(
stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length)

# keep track of which sequences are already finished
unfinished_sequences = flow.zeros(input_ids.shape[0]).fill_(1)
unfinished_sequences = flow.ones(input_ids.shape[0])
cur_len = input_ids.shape[-1]
while True:
# prepare model inputs
Expand Down Expand Up @@ -517,6 +517,10 @@ def greedy_search(
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
break

# Release records
self.past_key_values = [None] * len(self.decoder.layers)
self.encoder_states = None

return input_ids

def multinomial_sample(
Expand Down Expand Up @@ -553,7 +557,7 @@ def multinomial_sample(
stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length)
logits_warper = logits_warper if logits_warper is not None else LogitsProcessorList()

unfinished_sequences = flow.zeros(input_ids.shape[0]).fill_(1)
unfinished_sequences = flow.ones(input_ids.shape[0])
cur_len = input_ids.shape[-1]

while True:
Expand Down Expand Up @@ -612,6 +616,10 @@ def multinomial_sample(
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
break

# Release records
self.past_key_values = [None] * len(self.decoder.layers)
self.encoder_states = None

return input_ids

def beam_search(
Expand Down Expand Up @@ -747,6 +755,10 @@ def beam_search(
beam_indices=beam_indices,
)

# Release records
self.past_key_values = [None] * len(self.decoder.layers)
self.encoder_states = None

return sequence_outputs["sequences"]

@flow.no_grad()
Expand Down
30 changes: 17 additions & 13 deletions libai/inference/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,10 @@
# limitations under the License.

from libai.inference.basic import BasePipeline
from libai.tokenizer import T5Tokenizer
from libai.utils import distributed as dist


class TextGenerationPipeline(BasePipeline):
def build_tokenizer(self, cfg):
tokenizer = T5Tokenizer(
"data_test/t5_inference_model/spiece.model",
add_bos_token=True,
)
return tokenizer

def load_pretrain_weight(self, libai_cfg_model, model_path, mode="huggingface"):
"""load pretrained model.
Expand All @@ -48,12 +40,21 @@ def load_pretrain_weight(self, libai_cfg_model, model_path, mode="huggingface"):
model_type="t5",
)
return model_loader.load()
else:
return super().load_pretrain_weight(
elif mode == "libai":
from projects.MT5.utils.mt5_loader import T5LoaderLibai

model_loader = T5LoaderLibai(
libai_cfg_model,
libai_cfg_model.cfg,
model_path,
mode=mode,
)
return model_loader.load()
elif mode == "random":
from libai.engine import DefaultTrainer

return DefaultTrainer.build_model(self.cfg)
else:
raise NotImplementedError

def _parse_parameters(self, **pipeline_parameters):
preprocess_params = {}
Expand Down Expand Up @@ -82,8 +83,11 @@ def forward(self, encoder_input_dict, **kwargs) -> dict:
return {"return_ids": outputs}

def postprocess(self, model_output_dict, **kwargs) -> dict:
text = self.tokenizer.decode(model_output_dict["return_ids"][0], skip_special_tokens=True)
records = {"generated_text": text}
return_ids = model_output_dict["return_ids"]
records = [
{"generated_text": self.tokenizer.decode(return_ids[i], skip_special_tokens=True)}
for i in range(return_ids.size(0))
]
return records


Expand Down
4 changes: 2 additions & 2 deletions projects/MAE/modeling/mae.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,8 @@ def forward_loss(self, imgs, pred, mask):
target = (target - mean) / (var + 1.0e-6) ** 0.5

loss = (pred - target) ** 2
# We want the prev loss to be calculated with float32,
# and mean/sum below to be calculated with float16.
# We want the prev loss to be calculated with float16,
# and mean/sum below to be calculated with float32.
# this amp_white_identity will affect preceding ops to be float16
loss = flow._C.amp_white_identity(loss)
# this amp_black_identity will affect succeeding ops to be float32
Expand Down
6 changes: 6 additions & 0 deletions projects/MT5/configs/t5_inference.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from .mt5_base import cfg
from libai.config import LazyCall
from libai.tokenizer import T5Tokenizer
from projects.MT5.mt5_model import MT5Model, MT5ForPreTraining
from configs.common.train import train
from configs.common.data.t5_dataset import tokenization

cfg.update(
model_type="t5",
Expand Down Expand Up @@ -38,3 +40,7 @@
)

model = LazyCall(MT5Model)(cfg=cfg)
tokenization.tokenizer = LazyCall(T5Tokenizer)(
vocab_file="/path/to/spiece.model",
add_bos_token=True,
)
1 change: 0 additions & 1 deletion projects/T5/configs/t5_model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
attention_probs_dropout_prob=0.1,
relative_attention_num_buckets=32,
embedding_dropout_prob=0.1,
num_tokentypes=0,
initializer_range=0.02,
layernorm_eps=1e-5,
amp_enabled=False,
Expand Down

0 comments on commit 9a4af26

Please sign in to comment.