Skip to content

Commit

Permalink
Merge pull request #40 from HuangLK/new-dev
Browse files Browse the repository at this point in the history
New dev
  • Loading branch information
HuangLK authored Sep 6, 2023
2 parents faedea5 + 8c6f663 commit 18e2412
Show file tree
Hide file tree
Showing 24 changed files with 1,168 additions and 718 deletions.
133 changes: 0 additions & 133 deletions .gitignore

This file was deleted.

48 changes: 9 additions & 39 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# llama-deepspeed
# transpeeder
This is a project under development, which aims to fine-tune the llama (7-65B) model based on the 🤗transformers and 🚀deepspeed, and provide simple and convenient training scripts.

## requirement
## installation
```
pip install -r requirements.txt
pip install -e .
```

## data
Expand All @@ -18,56 +18,26 @@ Each line is a **JSON string**, as the JSON object must have `prompt` and `outpu
## convert hf model to ckpt
```bash
# llama-7B
python convert2ckpt.py --mp_world_size 4 \
python -m scripts.convert2ckpt --mp_world_size 4 \
--model_name_or_path /path/to/llama-7b-hf \
--output_dir /path/to/llama-7b-init-ckpt

# llama-30B
python convert2ckpt.py --mp_world_size 8 \
python -m scripts.convert2ckpt --mp_world_size 8 \
--model_name_or_path /path/to/llama-30b-hf \
--output_dir /path/to/llama-30b-init-ckpt
```

## finetune
llama-7B
```bash
deepspeed --include localhost:0,1,2,3 --master_port 22384 train.py \
--output_dir /path/to/output \
--init_ckpt /path/to/llama-7b-init-ckpt/ \
--data_path ./data/alpaca_data_sample_oneline_format.json \
--max_seq_len 1024 \
--train_steps 1000 \
--eval_steps 10 \
--save_steps 200 \
--log_steps 1 \
--pipe_parallel_size 4 \
--model_parallel_size 1 \
--use_flash_attn true \
--deepspeed_config ./configs/ds_config.json
```
See `examples/train_llama_deepspeed.sh`.

llama-30B
```bash
deepspeed --master_port 22384 train.py \
--output_dir /path/to/output \
--init_ckpt /path/to/llama-30b-init-ckpt/ \
--data_path ./data/alpaca_data_sample_oneline_format.json \
--max_seq_len 1024 \
--train_steps 1000 \
--eval_steps 10 \
--save_steps 200 \
--log_steps 1 \
--pipe_parallel_size 8 \
--model_parallel_size 1 \
--use_flash_attn true \
--deepspeed_config ./configs/ds_config_zero1.json
```

## convert ckpt to hf model
```bash
python convert2hf.py --model_size 7B \
python -m scripts.convert2hf --model_size 7B \
--input_dir ./output/llama-7B-ckpt/global_step1000/ \
--output_dir ./output/llama_hf_7B
--output_dir ./output/llama_hf_7B \
--tokenizer_size 32001
cp /path/to/llama-7b-hf/*.json ./output/llama_hf_7B
cp /path/to/llama-7b-hf/tokenizer.model ./output/llama_hf_7B
```
42 changes: 0 additions & 42 deletions common/log.py

This file was deleted.

15 changes: 0 additions & 15 deletions common/mp_wraps.py

This file was deleted.

14 changes: 10 additions & 4 deletions configs/ds_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
"train_batch_size": 128,
"steps_per_print": 100,
"gradient_clipping": 1.0,
"lr-decay-style": "cosine",
"warmup": 0.1,
"bf16": {
"enabled": false
},
Expand All @@ -19,12 +17,20 @@
"optimizer": {
"type": "Adam",
"params": {
"lr": 5e-6,
"lr": 1e-5,
"betas": [0.9, 0.95],
"eps": 1.0e-8
}
},
"min_lr": 5e-7,
"scheduler": {
"type": "WarmupDecayLR",
"params": {
"warmup_min_lr": 1e-6,
"warmup_max_lr": 1e-5,
"warmup_num_steps": 100,
"total_num_steps": 1000
}
},
"activation_checkpointing": {
"partition_activations": false,
"cpu_checkpointing": false,
Expand Down
20 changes: 16 additions & 4 deletions configs/ds_config_zero1.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
"train_batch_size": 128,
"steps_per_print": 100,
"gradient_clipping": 1.0,
"lr-decay-style": "cosine",
"warmup": 0.1,
"bf16": {
"enabled": false
},
Expand All @@ -19,12 +17,20 @@
"optimizer": {
"type": "Adam",
"params": {
"lr": 5e-6,
"lr": 1e-5,
"betas": [0.9, 0.95],
"eps": 1.0e-8
}
},
"min_lr": 5e-7,
"scheduler": {
"type": "WarmupDecayLR",
"params": {
"warmup_min_lr": 1e-6,
"warmup_max_lr": 1e-5,
"warmup_num_steps": 100,
"total_num_steps": 1000
}
},
"zero_optimization": {
"stage": 1,
"offload_optimizer": {
Expand All @@ -46,5 +52,11 @@
"synchronize_checkpoint_boundary": false,
"profile": false
},
"wandb": {
"enabled": true,
"team": null,
"group": null,
"project": "llama-65B-test-ckpt"
},
"wall_clock_breakdown": true
}
Loading

0 comments on commit 18e2412

Please sign in to comment.