-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommands
139 lines (123 loc) · 5.39 KB
/
commands
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Finetune DocVQA
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/docvqa_cached_extractive_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--model_folder docvqa_msr_ocr_75epoch_v1_ignore_unmatched_span_True_batchsize_16_lr_2eneg5 \
--num_epochs 75 \
--ignore_unmatched_span 1 \
--train_batch_size 16 \
--learning_rate 2e-5 \
--save_model
# Continue finetuning DocVQA
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/docvqa_cached_extractive_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--model_folder docvqa_msr_ocr_100epoch_v1_ignore_unmatched_span_True_batchsize_16_lr_2eneg5 \
--load_state docvqa_msr_ocr_50epoch_v1_ignore_unmatched_span_True_batchsize_16_lr_2eneg5 \
--num_epochs 50 \
--ignore_unmatched_span 1 \
--train_batch_size 8 \
--learning_rate 1e-5 \
--save_model \
--warmup_step 500
# Finetune InfographicVQA
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--num_epochs 50 \
--ignore_unmatched_span 1 \
--train_batch_size 4 \
--learning_rate 5e-6 \
--save_model \
--model_folder infographicvqa_msr_ocr_50epoch_v1_ignore_unmatched_span_True_batchsize_4
# Validate InfographicVQA on finetuned DocVQA model
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--load_state docvqa_msr_ocr_finetune_base_50epoch_smaller_lr \
--mode val \
--val_batch_size 4
# InfographicVQA ratio experiment on finetuned DocVQA model
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_document_ratio_experiment_msr_ocr_True_extraction_v1_enumeration \
--load_state docvqa_msr_ocr_finetune_base_50epoch_smaller_lr \
--mode val \
--val_batch_size 4
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_only_non_extractive_experiment_msr_ocr_True_extraction_v1_enumeration \
--load_state docvqa_msr_ocr_finetune_base_50epoch_smaller_lr \
--mode val \
--val_batch_size 4
# Test: training generative approach
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--save_model \
--model_folder test_generative_15epoch_batchsize_4 \
--train_batch_size 4 \
--num_epoch 15 \
--learning_rate 5e-6 \
--mode train \
--use_generation 1
## Test: New embeddings
# Train on docvqa, from layoutlmv3-base
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/docvqa_cached_extractive_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--model_folder docvqa_embeddings_90x30_from_zero_50epoch_batchsize_4_lr_1eneg7 \
--save_mode \
--num_epochs 50 \
--ignore_unmatched_span 1 \
--train_batch_size 4 \
--learning_rate 1e-7 \
--mode train \
--use_embeddings
# Finetune
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/docvqa_cached_extractive_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--model_folder docvqa_embeddings_90x30_finetune_baseline_50epoch_batchsize_4_lr_1eneg7 \
--load_state docvqa_msr_ocr_finetune_base_50epoch_smaller_lr \
--save_model \
--num_epochs 50 \
--ignore_unmatched_span 1 \
--train_batch_size 4 \
--learning_rate 1e-7 \
--mode train \
--use_embeddings
# Eval
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/docvqa_cached_extractive_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--load_state docvqa_embeddings_30x90_finetune_baseline_50epoch_batchsize_4_lr_1eneg7 \
--mode val \
--use_embeddings
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--load_state docvqa_embeddings_30x90_finetune_baseline_50epoch_batchsize_4_lr_1eneg7 \
--mode val \
--use_embeddings
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_document_msr_ocr_True_extraction_v1_enumeration \
--load_state docvqa_embeddings_30x90_finetune_baseline_50epoch_batchsize_4_lr_1eneg7 \
--mode val \
--use_embeddings
#### Test on infographicvqa
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_all_lowercase_True_msr_ocr_True_extraction_v1_enumeration \
--load_state docvqa_embeddings_v2_msr_ocr_20epoch_batchsize_4_lr_5eneg6 \
--mode val \
--use_embeddings 1 \
--resize 1
accelerate launch \
--config_file launch_config.yaml model_runner.py \
--dataset_file cached_datasets/infographicvqa_very_horizontal_msr_ocr_True_extraction_v1_enumeration \
--load_state docvqa_msr_ocr_finetune_base_50epoch_smaller_lr \
--mode val \
--use_embeddings 0 \
--resize 1