diff --git a/libai/onnx_export/onnx_inference/t5_onnx_infer.py b/libai/onnx_export/onnx_inference/t5_onnx_infer.py new file mode 100644 index 000000000..b07d44cd3 --- /dev/null +++ b/libai/onnx_export/onnx_inference/t5_onnx_infer.py @@ -0,0 +1,68 @@ +# coding=utf-8 +# Copyright 2021 The OneFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from collections import OrderedDict +from typing import List + +import numpy as np +import onnxruntime as ort + + +class OnnxModel: + def __init__( + self, + onnx_filename, + providers: List[str] = None, + ort_optimize: bool = True, + ): + ort_sess_opt = ort.SessionOptions() + ort_sess_opt.graph_optimization_level = ( + ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED + if ort_optimize + else ort.GraphOptimizationLevel.ORT_DISABLE_ALL + ) + if providers is None: + if ort.__version__ > "1.9.0": + providers = [ + "TensorrtExecutionProvider", + "CUDAExecutionProvider", + "CPUExecutionProvider", + ] + else: + providers = ["CPUExecutionProvider"] + self.sess = ort.InferenceSession( + onnx_filename, sess_options=ort_sess_opt, providers=providers + ) + + def forward(self, input_list): + ipt_dict = OrderedDict() + for idx, ipt in enumerate(self.sess.get_inputs()): + ipt_dict[ipt.name] = input_list[idx] + onnx_res = self.sess.run([], ipt_dict) + return onnx_res + + +if __name__ == "__main__": + onnx_model = OnnxModel("model.onnx") + input_list = [ + np.ones((1, 5)).astype(np.int64), + np.ones((1, 3)).astype(np.int64), + np.ones((1, 5, 5)).astype(bool), + np.ones((1, 3, 3)).astype(bool), + np.ones((1, 3, 5)).astype(bool), + ] + + print(onnx_model.forward(input_list)) diff --git a/libai/onnx_export/t5_to_onnx.py b/libai/onnx_export/t5_to_onnx.py new file mode 100644 index 000000000..9fe7f5b67 --- /dev/null +++ b/libai/onnx_export/t5_to_onnx.py @@ -0,0 +1,128 @@ +# coding=utf-8 +# Copyright 2021 The OneFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import oneflow as flow +from oneflow import nn +from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check + +from libai.config import LazyConfig +from libai.engine import DefaultTrainer + + +def get_model(config_file): + cfg = LazyConfig.load(config_file) + + cfg.model.cfg.mlp_type = "t5" + cfg.model.cfg.pretrained_model_path = None + cfg.dataloader = None + cfg.tokenization = None + + print("Building model....") + model = DefaultTrainer.build_model(cfg) + print("Build model finished.") + + return model + + +class t5Graph(nn.Graph): + def __init__(self, eager_model): + super().__init__() + self.model = eager_model + + def build( + self, + encoder_input_ids, + encoder_attn_mask, + decoder_input_ids, + decoder_attn_mask, + encoder_decoder_attn_mask, + ): + out = self.model( + encoder_input_ids, + encoder_attn_mask, + decoder_input_ids, + decoder_attn_mask, + encoder_decoder_attn_mask, + ) + return out["prediction_scores"] + + +if __name__ == "__main__": + model = get_model("projects/MT5/configs/mt5_pretrain.py") + model.eval() + + t5_graph = t5Graph(model) + # Build the static graph model + encoder_input_ids = flow.ones( + 1, 5, dtype=flow.int64, sbp=flow.sbp.broadcast, placement=flow.placement("cuda", ranks=[0]) + ) + encoder_attn_mask = flow.ones( + 1, 3, dtype=flow.int64, sbp=flow.sbp.broadcast, placement=flow.placement("cuda", ranks=[0]) + ) + decoder_input_ids = flow.ones( + 1, + 5, + 5, + dtype=flow.bool, + sbp=flow.sbp.broadcast, + placement=flow.placement("cuda", ranks=[0]), + ) + decoder_attn_mask = flow.ones( + 1, + 3, + 3, + dtype=flow.bool, + sbp=flow.sbp.broadcast, + placement=flow.placement("cuda", ranks=[0]), + ) + encoder_decoder_attn_mask = flow.ones( + 1, + 3, + 5, + dtype=flow.bool, + sbp=flow.sbp.broadcast, + placement=flow.placement("cuda", ranks=[0]), + ) + + # check your model.forward is valid + # output = t5_graph( + # encoder_input_ids, + # encoder_attn_mask, + # decoder_input_ids, + # decoder_attn_mask, + # encoder_decoder_attn_mask + # ) + # print(output) + + print("Compiling the graph which may make some time, please wait for a moment....") + t5_graph._compile( + encoder_input_ids, + encoder_attn_mask, + decoder_input_ids, + decoder_attn_mask, + encoder_decoder_attn_mask, + ) + + convert_to_onnx_and_check( + t5_graph, + external_data=False, + opset=11, + flow_weight_dir=None, + onnx_model_path="./", + dynamic_batch_size=False, + device="gpu_global", + input_tensor_range=[0, 10], + )