-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_pretrain.sh
executable file
·41 lines (39 loc) · 1.24 KB
/
run_pretrain.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
PATH_TO_DNABERT_REPO=$SCRIPT_DIR/DNABERT
cd $PATH_TO_DNABERT_REPO/examples
export KMER=6
export TRAIN_FILE=sample_data/pre/6_3k.txt
export TEST_FILE=sample_data/pre/6_3k.txt
export SOURCE=$PATH_TO_DNABERT_REPO
export OUTPUT_PATH=output$KMER
export TOKENIZER_NAME=$PATH_TO_DNABERT_REPO/src/transformers/dnabert-config/bert-config-$KMER/vocab.txt
python run_pretrain.py \
--output_dir $OUTPUT_PATH \
--model_type=dna \
--tokenizer_name=$TOKENIZER_NAME \
--config_name=$SOURCE/src/transformers/dnabert-config/bert-config-$KMER/config.json \
--do_train \
--train_data_file=$TRAIN_FILE \
--do_eval \
--eval_data_file=$TEST_FILE \
--mlm \
--gradient_accumulation_steps 25 \
--per_gpu_train_batch_size 10 \
--per_gpu_eval_batch_size 6 \
--save_steps 500 \
--save_total_limit 20 \
--max_steps 200000 \
--evaluate_during_training \
--logging_steps 500 \
--line_by_line \
--learning_rate 4e-4 \
--block_size 512 \
--adam_epsilon 1e-6 \
--weight_decay 0.01 \
--beta1 0.9 \
--beta2 0.98 \
--mlm_probability 0.025 \
--warmup_steps 10000 \
--overwrite_output_dir \
--n_process 12 \
--fp16