-
Notifications
You must be signed in to change notification settings - Fork 245
/
Copy pathtest.py
92 lines (74 loc) · 3.19 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Copyright 2023 Huy Le Nguyen (@nglehuy)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from tensorflow_asr import datasets, keras, tf, tokenizers # import to aid logging messages
from tensorflow_asr.callbacks import PredictLogger
from tensorflow_asr.configs import Config
from tensorflow_asr.models.base_model import BaseModel
from tensorflow_asr.utils import app_util, cli_util, env_util, file_util
env_util.setup_logging()
logger = tf.get_logger()
def main(
config_path: str,
dataset_type: str,
datadir: str,
outputdir: str,
h5: str = None,
mxp: str = "none",
bs: int = 1,
device: int = 0,
cpu: bool = False,
jit_compile: bool = False,
repodir: str = os.path.realpath(os.path.join(os.path.dirname(__file__), "..")),
):
outputdir = file_util.preprocess_paths(outputdir, isdir=True)
checkpoint_name = os.path.splitext(os.path.basename(h5))[0]
env_util.setup_seed()
env_util.setup_devices([device], cpu=cpu)
env_util.setup_mxp(mxp=mxp)
config = Config(config_path, training=False, repodir=repodir, datadir=datadir)
batch_size = bs
tokenizer = tokenizers.get(config)
model: BaseModel = keras.models.model_from_config(config.model_config)
model.tokenizer = tokenizer
model.make(batch_size=batch_size)
model.load_weights(h5, by_name=file_util.is_hdf5_filepath(h5), skip_mismatch=False)
model.jit_compile = jit_compile
model.summary()
for test_data_config in config.data_config.test_dataset_configs:
if not test_data_config.name:
raise ValueError("Test dataset name must be provided")
logger.info(f"Testing dataset: {test_data_config.name}")
output = os.path.join(outputdir, f"{test_data_config.name}-{checkpoint_name}.tsv")
test_dataset = datasets.get(tokenizer=tokenizer, dataset_config=test_data_config, dataset_type=dataset_type)
test_data_loader = test_dataset.create(batch_size)
overwrite = True
if tf.io.gfile.exists(output):
while overwrite not in ["yes", "no"]:
overwrite = input(f"File {output} exists, overwrite? (yes/no): ").lower()
overwrite = overwrite == "yes"
if overwrite:
with file_util.save_file(output) as output_file_path:
model.predict(
test_data_loader,
verbose=1,
callbacks=[
PredictLogger(test_dataset=test_dataset, output_file_path=output_file_path),
],
)
evaluation_outputs = app_util.evaluate_hypotheses(output)
logger.info(json.dumps(evaluation_outputs, indent=2))
if __name__ == "__main__":
cli_util.run(main)