-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_all_models.sh
150 lines (136 loc) · 4.61 KB
/
train_all_models.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/bin/bash
#SBATCH --job-name=bioasq-large-all-models
#SBATCH --cpus-per-task=8 --mem=8000M
#SBATCH -p gpu --gres=gpu:a100:1
#SBATCH --output=/home/rwg642/RetrievalAugmentedClassification/bioasq-large-all-models.txt
#SBATCH --time=12:00:00
module load miniconda/4.12.0
conda activate kiddothe2b
echo $SLURMD_NODENAME
echo $CUDA_VISIBLE_DEVICES
MODEL_PATH='microsoft/BiomedNLP-PubMedBERT-large-uncased-abstract'
DATASET_NAME='bioasq-l2'
BATCH_SIZE=16
MAX_SEQ_LENGTH=512
export PYTHONPATH=.
export TOKENIZERS_PARALLELISM=false
for NO_SAMPLES in 10000
do
# DELETE CACHED DATASET
rm -rf ../.cache/huggingface/datasets/kiddothe2b___multilabel_bench/${DATASET_NAME}
# TRAIN STANDARD CLASSIFIER
python classifier/train_classifier.py \
--model_name_or_path ${MODEL_PATH} \
--retrieval_augmentation false \
--dataset_name ${DATASET_NAME} \
--output_dir data/${DATASET_NAME}/${MODEL_PATH}-${NO_SAMPLES} \
--do_train \
--do_eval \
--do_pred \
--overwrite_output_dir \
--load_best_model_at_end \
--metric_for_best_model micro-f1 \
--greater_is_better True \
--max_seq_length ${MAX_SEQ_LENGTH} \
--evaluation_strategy epoch \
--save_strategy epoch \
--save_total_limit 5 \
--learning_rate 3e-5 \
--per_device_train_batch_size ${BATCH_SIZE} \
--per_device_eval_batch_size ${BATCH_SIZE} \
--seed 42 \
--num_train_epochs 20 \
--max_train_samples ${NO_SAMPLES} \
--warmup_ratio 0.05 \
--weight_decay 0.01 \
--fp16 \
--fp16_full_eval \
--lr_scheduler_type cosine \
--gradient_accumulation_steps 2 \
--eval_accumulation_steps 2
# DELETE CACHED DATASET
rm -rf ../.cache/huggingface/datasets/kiddothe2b___multilabel_bench/${DATASET_NAME}
# CREATE DATASTORE
python retriever/apply_retriever.py \
--dataset_name ${DATASET_NAME} \
--output_dir ${DATASET_NAME}-${NO_SAMPLES}-embeddings \
--model_name data/${DATASET_NAME}/${MODEL_PATH}-${NO_SAMPLES} \
--n_samples ${NO_SAMPLES}
# TRAIN RA CLASSIFIER
python classifier/train_classifier.py \
--model_name_or_path ${MODEL_PATH} \
--embeddings_path ${DATASET_NAME}-${NO_SAMPLES}-embeddings \
--retrieval_augmentation true \
--no_neighbors 32 \
--dec_layers 1 \
--dec_attention_heads 1 \
--dataset_name ${DATASET_NAME} \
--output_dir data/${DATASET_NAME}/${MODEL_PATH}-ra-${NO_SAMPLES} \
--do_train \
--do_eval \
--do_pred \
--overwrite_output_dir \
--load_best_model_at_end \
--metric_for_best_model micro-f1 \
--greater_is_better True \
--max_seq_length ${MAX_SEQ_LENGTH} \
--evaluation_strategy epoch \
--save_strategy epoch \
--save_total_limit 5 \
--learning_rate 3e-5 \
--per_device_train_batch_size ${BATCH_SIZE} \
--per_device_eval_batch_size ${BATCH_SIZE} \
--seed 42 \
--num_train_epochs 20 \
--max_train_samples ${NO_SAMPLES} \
--warmup_ratio 0.05 \
--weight_decay 0.01 \
--fp16 \
--fp16_full_eval \
--lr_scheduler_type cosine \
--gradient_accumulation_steps 2 \
--eval_accumulation_steps 2
# DELETE CACHED DATASET
rm -rf ../.cache/huggingface/datasets/kiddothe2b___multilabel_bench/${DATASET_NAME}
# CREATE DATASTORE
python retriever/apply_retriever.py \
--dataset_name ${DATASET_NAME} \
--output_dir ${DATASET_NAME}-${NO_SAMPLES}-constrained-embeddings \
--model_name data/${DATASET_NAME}/${MODEL_PATH}-${NO_SAMPLES} \
--n_samples ${NO_SAMPLES} \
--constrained_search
# TRAIN RA CLASSIFIER
python classifier/train_classifier.py \
--model_name_or_path ${MODEL_PATH} \
--embeddings_path ${DATASET_NAME}-${NO_SAMPLES}-constrained-embeddings \
--retrieval_augmentation true \
--no_neighbors 32 \
--dec_layers 1 \
--dec_attention_heads 1 \
--dataset_name ${DATASET_NAME} \
--output_dir data/${DATASET_NAME}/${MODEL_PATH}-ra-constrained-${NO_SAMPLES} \
--do_train \
--do_eval \
--do_pred \
--overwrite_output_dir \
--load_best_model_at_end \
--metric_for_best_model micro-f1 \
--greater_is_better True \
--max_seq_length ${MAX_SEQ_LENGTH} \
--evaluation_strategy epoch \
--save_strategy epoch \
--save_total_limit 5 \
--learning_rate 3e-5 \
--per_device_train_batch_size ${BATCH_SIZE} \
--per_device_eval_batch_size ${BATCH_SIZE} \
--seed 42 \
--num_train_epochs 20 \
--max_train_samples ${NO_SAMPLES} \
--warmup_ratio 0.05 \
--weight_decay 0.01 \
--fp16 \
--fp16_full_eval \
--lr_scheduler_type cosine \
--gradient_accumulation_steps 2 \
--eval_accumulation_steps 2
done