forked from lm-sys/FastChat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.sh
51 lines (49 loc) · 1.81 KB
/
evaluate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# CUDA_VISIBLE_DEVICES=6,7 torchrun --nproc_per_node=2 --master_port=20003 evaluate.py \
# --model_name_or_path "/local/data/xuanming/models/output_vicuna_13b_train_combined_lr_1e_5/checkpoint-1149" \
# --data_path data/dev/ProLex_v1.0_dev.csv \
# --fsdp "full_shard auto_wrap" \
# --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer'
# --bf16 True \
# --output_dir /local/data/xuanming/models/output_vicuna_13b_trial_lr_1e_5 \
# --num_train_epochs 10 \
# --per_device_train_batch_size 1 \
# --per_device_eval_batch_size 2 \
# --gradient_accumulation_steps 1 \
# --evaluation_strategy "no" \
# --save_strategy "epoch" \
# --save_steps 1200 \
# --save_total_limit 10 \
# --learning_rate 5e-5 \
# --weight_decay 0. \
# --warmup_steps 100 \
# --lr_scheduler_type "linear" \
# --logging_steps 1 \
# --tf32 True \
# --model_max_length 2048 \
# --gradient_checkpointing True \
# --lazy_preprocess False \
python3 evaluate.py \
--model_name_or_path "outputs/example_predictions.csv" \
--data_path data/test/ProLex_v1.0_test.csv
# python3 evaluate.py \
# --model_name_or_path "gpt-4-32" \
# --data_path data/test/test_final_cefr.csv \
# --bf16 True \
# --output_dir outputs/vicuna-7b-v1.5_trial \
# --num_train_epochs 3 \
# --per_device_train_batch_size 1 \
# --per_device_eval_batch_size 2 \
# --gradient_accumulation_steps 16 \
# --evaluation_strategy "no" \
# --save_strategy "steps" \
# --save_steps 1200 \
# --save_total_limit 10 \
# --learning_rate 2e-5 \
# --weight_decay 0. \
# --warmup_ratio 0.03 \
# --lr_scheduler_type "cosine" \
# --logging_steps 1 \
# --tf32 True \
# --model_max_length 2048 \
# --gradient_checkpointing True \
# --lazy_preprocess False \