Merge pull request #40 from HuangLK/new-dev

New dev
HuangLK · Sep 6, 2023 · 18e2412 · 18e2412
2 parents faedea5 + 8c6f663
commit 18e2412
Show file tree

Hide file tree

Showing 24 changed files with 1,168 additions and 718 deletions.
diff --git a/.gitignore b/.gitignore
diff --git a/README.md b/README.md
@@ -1,9 +1,9 @@
-# llama-deepspeed
+# transpeeder
 This is a project under development, which aims to fine-tune the llama (7-65B) model based on the 🤗transformers and 🚀deepspeed, and provide simple and convenient training scripts.
 
-## requirement
+## installation
 ```
-pip install -r requirements.txt
+pip install -e .
 ```
 
 ## data
@@ -18,56 +18,26 @@ Each line is a **JSON string**, as the JSON object must have `prompt` and `outpu
 ## convert hf model to ckpt
 ```bash
 # llama-7B
-python convert2ckpt.py --mp_world_size 4 \
+python -m scripts.convert2ckpt --mp_world_size 4 \
     --model_name_or_path /path/to/llama-7b-hf \
     --output_dir /path/to/llama-7b-init-ckpt
 
 # llama-30B
-python convert2ckpt.py --mp_world_size 8 \
+python -m scripts.convert2ckpt --mp_world_size 8 \
     --model_name_or_path /path/to/llama-30b-hf \
     --output_dir /path/to/llama-30b-init-ckpt
 ```
 
 ## finetune
-llama-7B
-```bash
-deepspeed --include localhost:0,1,2,3  --master_port 22384 train.py \
-    --output_dir /path/to/output \
-    --init_ckpt /path/to/llama-7b-init-ckpt/ \
-    --data_path ./data/alpaca_data_sample_oneline_format.json \
-    --max_seq_len 1024 \
-    --train_steps 1000 \
-    --eval_steps 10 \
-    --save_steps 200 \
-    --log_steps 1 \
-    --pipe_parallel_size 4 \
-    --model_parallel_size 1 \
-    --use_flash_attn true \
-    --deepspeed_config ./configs/ds_config.json
-```
+See `examples/train_llama_deepspeed.sh`.
 
-llama-30B
-```bash
-deepspeed --master_port 22384 train.py \
-    --output_dir /path/to/output \
-    --init_ckpt /path/to/llama-30b-init-ckpt/ \
-    --data_path ./data/alpaca_data_sample_oneline_format.json \
-    --max_seq_len 1024 \
-    --train_steps 1000 \
-    --eval_steps 10 \
-    --save_steps 200 \
-    --log_steps 1 \
-    --pipe_parallel_size 8 \
-    --model_parallel_size 1 \
-    --use_flash_attn true \
-    --deepspeed_config ./configs/ds_config_zero1.json
-```
 
 ## convert ckpt to hf model
 ```bash
-python convert2hf.py --model_size 7B \
+python -m scripts.convert2hf --model_size 7B \
     --input_dir ./output/llama-7B-ckpt/global_step1000/ \
-    --output_dir ./output/llama_hf_7B
+    --output_dir ./output/llama_hf_7B \
+    --tokenizer_size 32001
 cp /path/to/llama-7b-hf/*.json ./output/llama_hf_7B
 cp /path/to/llama-7b-hf/tokenizer.model ./output/llama_hf_7B
 ```
diff --git a/common/log.py b/common/log.py
diff --git a/common/mp_wraps.py b/common/mp_wraps.py
diff --git a/configs/ds_config.json b/configs/ds_config.json
@@ -3,8 +3,6 @@
     "train_batch_size": 128,
     "steps_per_print": 100,
     "gradient_clipping": 1.0,
-    "lr-decay-style": "cosine",
-    "warmup": 0.1,
     "bf16": {
         "enabled": false
     },
@@ -19,12 +17,20 @@
     "optimizer": {
         "type": "Adam",
         "params": {
-            "lr": 5e-6,
+            "lr": 1e-5,
             "betas": [0.9, 0.95],
             "eps": 1.0e-8
         }
     },
-    "min_lr": 5e-7,
+    "scheduler": {
+        "type": "WarmupDecayLR",
+        "params": {
+            "warmup_min_lr": 1e-6,
+            "warmup_max_lr": 1e-5,
+            "warmup_num_steps": 100,
+            "total_num_steps": 1000
+        }
+    },
     "activation_checkpointing": {
         "partition_activations": false,
         "cpu_checkpointing": false,

diff --git a/configs/ds_config_zero1.json b/configs/ds_config_zero1.json
@@ -3,8 +3,6 @@
     "train_batch_size": 128,
     "steps_per_print": 100,
     "gradient_clipping": 1.0,
-    "lr-decay-style": "cosine",
-    "warmup": 0.1,
     "bf16": {
         "enabled": false
     },
@@ -19,12 +17,20 @@
     "optimizer": {
         "type": "Adam",
         "params": {
-            "lr": 5e-6,
+            "lr": 1e-5,
             "betas": [0.9, 0.95],
             "eps": 1.0e-8
         }
     },
-    "min_lr": 5e-7,
+    "scheduler": {
+        "type": "WarmupDecayLR",
+        "params": {
+            "warmup_min_lr": 1e-6,
+            "warmup_max_lr": 1e-5,
+            "warmup_num_steps": 100,
+            "total_num_steps": 1000
+        }
+    },
     "zero_optimization": {
         "stage": 1,
         "offload_optimizer": {
@@ -46,5 +52,11 @@
         "synchronize_checkpoint_boundary": false,
         "profile": false
     },
+    "wandb": {
+        "enabled": true,
+        "team": null,
+        "group": null,
+        "project": "llama-65B-test-ckpt"
+    },
     "wall_clock_breakdown": true
 }