-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathconfig.json
64 lines (57 loc) · 1.74 KB
/
config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
"max_cache_size_gb": 320,
"cache_folder": "D:\\randoBS",
"dataset_path": "F:\\desctop\\Converted_tiny_completion.jsonl",
"validation_dataset_path": "F:\\desctop\\Converted_tiny_completion.jsonl",
"teacher_models_folder": "E:\\fucking_virus\\desctop\\TinyLlama-1.1B-intermediate-step-1431k-3T_safetensors",
"student_path": "E:\\fucking_virus\\desctop\\TinyLlama-1.1B-intermediate-step-1431k-3T_safetensors",
"ignore_model_type": true,
"rebase_dataset": false,
"use_teachers": true,
"context_len": 2048,
"save_sys_range": true,
"save_user_range": true,
"save_assistant_range": true,
"crop_distr_to_size": 32000,
"enable_topK": true,
"save_topK": 400,
"device": "cuda:0",
"num_inference_workers": 1,
"reserve_vram": [0.5, 0.5],
"num_epochs": 2,
"num_warmup_steps": 200,
"batch_size": 4,
"grad_accum_batches": 4,
"grad_checkpointing": false,
"temperature": 1,
"lr": 1e-6,
"decay_start": 0.9,
"alpha": 2,
"lr_scheduler": "wsd",
"optimizer": "adamw8bit",
"data_order": "sorted",
"training_precision": "4bit",
"validate_every_n_epochs": 0.5,
"save_student_every_n_epochs": 4,
"num_gpu0_layers": 9,
"device_map": "custom",
"max_memory": {
"0": "24GB",
"1": "24GB",
"cpu": "200GB"
},
"multi_gpu": false,
"save_final_state": false,
"use_flash_attn_2": true,
"wandb_comment": "FFT",
"freeze_layers": [".block_sparse_moe.gate"],
"add_bos": false,
"prompt_format": {
"SYS_START": "#System:\\n",
"USER_START": "#User:\\n",
"ASSISTANT_START": "#AI:\\n",
"SYS_END": "\\n\\n",
"USER_END": "\\n\\n",
"ASSISTANT_END": "\\n\\n"
}
}