-
Notifications
You must be signed in to change notification settings - Fork 453
/
infer_large.sh
30 lines (26 loc) · 1.06 KB
/
infer_large.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
MODEL_DIR=../../checkpoints/QA-PubMedQA-BioGPT-Large
MODEL=checkpoint.pt
DATA_DIR=${PWD}/../../data/PubMedQA/biogpt-large-pqal_qcl_ansis-bin
BASE_DATA_DIR=${DATA_DIR%/*}
BIN_DATA_DIR=${DATA_DIR##*/}
DATA_PREFIX=${BIN_DATA_DIR%-*}
RAW_DATA_DIR=${BASE_DATA_DIR}/raw
OUTPUT_FILE=generate_${MODEL}
INPUT_FILE=${RAW_DATA_DIR}/${DATA_PREFIX}_test.tok.bpe.x
OUTPUT_FILE=${MODEL_DIR}/${OUTPUT_FILE}
GOLD_FILE=${RAW_DATA_DIR}/test_ground_truth.json
# inference
if [ ! -f "$OUTPUT_FILE" ]; then
echo "Begin inferencing ${INPUT_FILE} using ${MODEL_DIR}/${MODEL}"
python ../../inference.py --data_dir=${DATA_DIR} --model_dir=${MODEL_DIR} --model_file=${MODEL} --src_file=${INPUT_FILE} --output_file=${OUTPUT_FILE}
fi
# debpe
sed -i "s/@@ //g" ${OUTPUT_FILE}
# detok
perl ${MOSES}/scripts/tokenizer/detokenizer.perl -l en -a < ${OUTPUT_FILE} > ${OUTPUT_FILE}.detok
# postprocess
python postprocess.py ${OUTPUT_FILE}.detok
# eval
python hard_match_evaluation.py ${OUTPUT_FILE}.detok.extracted.txt ${GOLD_FILE}