forked from SAITPublic/MLPerf_Training_v2.0
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_tf_checkpoint.py
82 lines (65 loc) · 2.89 KB
/
convert_tf_checkpoint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
import argparse
from modeling import BertForPreTraining, BertConfig
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--bert_model", default="bert-large-uncased", type=str,
help="Bert pre-trained model selected in the list: bert-base-uncased, "
"bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.")
parser.add_argument('--tf_checkpoint',
type=str,
default="/google_bert_data",
help="Path to directory containing TF checkpoint")
parser.add_argument('--bert_config_path',
type=str,
default="/workspace/phase1",
help="Path bert_config.json is located in")
parser.add_argument('--output_checkpoint', type=str,
default='./checkpoint.pt',
help="Path to output PyT checkpoint")
return parser.parse_args()
def prepare_model(args, device):
# Prepare model
config = BertConfig.from_json_file(args.bert_config_path)
# Padding for divisibility by 8
if config.vocab_size % 8 != 0:
config.vocab_size += 8 - (config.vocab_size % 8)
print('padded vocab size to: {}'.format(config.vocab_size))
# Set some options that the config file is expected to have (but don't need to be set properly
# at this point)
config.pad = False
config.unpad = False
config.dense_seq_output = False
config.fused_mha = False
config.fused_gelu_bias = False
config.fuse_qkv = False
config.fuse_scale = False
config.fuse_mask = False
config.fuse_dropout = False
config.apex_softmax = False
config.enable_stream = False
if config.fuse_mask == True: config.apex_softmax = True
if config.pad == False: config.enable_stream = True
if config.unpad == True: config.fused_mha = False
#Load from TF checkpoint
model = BertForPreTraining.from_pretrained(args.tf_checkpoint, from_tf=True, config=config)
return model
def main():
args = parse_arguments()
device = torch.device("cuda")
model = prepare_model(args, device)
torch.save({'model' : model.state_dict() }, args.output_checkpoint)
if __name__ == "__main__":
main()