-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_glue.sh
49 lines (43 loc) · 1.94 KB
/
run_glue.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
set -x
#! Model parameters
model_name=roberta-base # Pre-trained model name
task_name=rte # Task name for GLUE benchmark (e.g., RTE for Recognizing Textual Entailment)
# Training parameters
seed=42 # Random seed for reproducibility
lr=3e-4 # Learning rate
max_length=128 # Maximum sequence length for input data
per_device_train_batch_size=32 # Training batch size per device
per_device_eval_batch_size=128 # Evaluation batch size per device
num_train_epochs=10 # Number of training epochs
#! HyCLoRA core parameters
use_hyclora=True # Whether to use HyCLoRA
layer_type=intra_inter # Type of HyCLoRA layer (e.g., intra_inter)
iteration_threshold=5 # Calibration steps for HyCLoRA
softmax_outlier_ratio=0.05 # Outlier ratio for softmax
layernorm_outlier_ratio=0.005 # Outlier ratio for LayerNorm
q_bit=2 # Quantization bit width
# Experiment tag and output directory
tag=${model_name}-${use_hyclora}-${layer_type}-${q_bit}-${layernorm_outlier_ratio}-${softmax_outlier_ratio}
exp_name=glue-${task_name}-${tag}
output_dir=exp_results_glue/${exp_name}
# Create the output directory
mkdir -p exp_results_glue
# Training command
python -u run_glue.py \
--model-name-or-path $model_name \
--task-name $task_name \
--max-length $max_length \
--per-device-train-batch-size $per_device_train_batch_size \
--per-device-eval-batch-size $per_device_eval_batch_size \
--learning-rate $lr \
--num-train-epochs $num_train_epochs \
--seed $seed \
--output-dir $output_dir \
--pad-to-max-length \
--use-hyclora $use_hyclora \
--layer-type $layer_type \
--iteration-threshold $iteration_threshold \
--layernorm-outlier-ratio $layernorm_outlier_ratio \
--softmax-outlier-ratio $softmax_outlier_ratio \
--q-bit $q_bit | tee ${output_dir}.log