Oneflow-Inc · Lusfie · Oct 23, 2024 · Oct 23, 2024 · xiezipeng-ML · Oct 24, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
+# data file
+alpaca_data/
+libai/version.py
+sft_result
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/projects/Llama/utils/prepare_alpaca.py b/projects/Llama/utils/prepare_alpaca.py
@@ -114,7 +114,8 @@ def prepare_sample(example: dict, tokenizer, max_length: int) -> dict:
 
     prompt = tokenizer.tokenize(full_prompt, add_bos=True, add_eos=False, device="cpu")[0]
     example = tokenizer.tokenize(
-        full_prompt_and_response, add_bos=True, add_eos=True, device="cpu"
+        full_prompt_and_response, add_bos=True, add_eos=True, device=None,
+        # device="cpu"
     )[0]
 
     padding = max_length - example.shape[0]

diff --git a/projects/Llama3/README.md b/projects/Llama3/README.md
@@ -0,0 +1,60 @@
+# Llama3
+
+Reproduce Llama3 with OneFlow, which effect are equivalent to HuggingFace's [Llama3](https://huggingface.co/docs/transformers/main/en/model_doc/llama3#overview).
+
+## Introduce
+The Llama3 Supervised FineTuning project can support 3D parallel.
+
+## FineTuning Llama3
+FineTuning Llama3 on 8 GPUs using parallelism.
+
+### 1. Prepare the alpaca dataset
+
+> set the parameters in `projects/Llama3/utils/prepare_alpaca.py` for prepare the datasets, such as `destination_path` and `checkpoint_dir`.
+
+> Get the alpaca dataset files by running:
+```python3
+# path/to/libai
+python projects/Llama3/utils/prepare_alpaca.py
+```
+
+### 2. Prepare your finetuning config file
+
+> set the finetuning parameters in `projects/Llama3/configs/llama_sft.py`, such as `dataset_path` and `pretrained_model_path`.
+
+### 3. Run the following code to start SFT
+```bash
+# full finetune
+bash tools/train.sh projects/Llama3/train_net.py projects/Llama3/configs/llama_sft.py 8
+
+# adapter finetune
+bash tools/train.sh projects/Llama3/adapter/train_net.py projects/Llama3/adapter/adapter_sft.py 8
+```
+
+## Evaluate
+
+> set the eval parameters in `/data/home/xiezipeng/libai/projects/Llama3/utils/eval_adapter.py`, and running:
+```python3
+python projects/Llama3/utils/eval_adapter.py
+```
+
+## Llama3 Inference
+
+- Prepare the Llama3 checkpoint.
+- Adjust the parameters in the `projects/Llama3/pipeline.py`, and running:
+```bash
+bash tools/infer.sh projects/Llama3/pipeline.py 8
+```
+
+## npu/xpu example
+
+- npu
+```bash
+python projects/Llama3/pipeline.py --device=npu --mode=huggingface --model_path /your/model/path
+```
+
+- xpu
+```bash
+python projects/Llama3/pipeline.py --device=xpu --mode=huggingface --model_path /your/model/path
+```
+
diff --git a/projects/Llama3/adapter/adapter_config.py b/projects/Llama3/adapter/adapter_config.py
@@ -0,0 +1,63 @@
+from omegaconf import DictConfig, OmegaConf
+
+from configs.common.train import train  # noqa
+from libai.config import LazyCall
+from projects.Llama3.adapter.adapter_model import LlamaForCausalLM
+from projects.Llama3.tokenizer import LlamaTokenizer
+
+cfg = dict(
+    # Model
+    hidden_act="silu",
+    hidden_size=4096,
+    initializer_range=0.02,
+    intermediate_size=11008,
+    max_position_embeddings=2048,
+    num_attention_heads=32,
+    hidden_layers=32,
+    pretraining_tp=1,
+    rms_norm_eps=1e-05,
+    rope_scaling=None,
+    tie_word_embeddings=False,
+    vocab_size=32000,
+    use_scaled_init_for_output_weights=False,
+    scale_mask_softmax_fusion=False,
+    amp_enabled=True,
+    # Inference
+    is_encoder_decoder=False,
+    max_length=256,
+    min_length=0,
+    do_sample=False,
+    early_stopping=False,
+    num_beams=1,
+    num_beam_groups=1,
+    diversity_penalty=0.0,
+    temperature=0.9,
+    top_k=50,
+    top_p=0.6,
+    typical_p=1.0,
+    repetition_penalty=1.0,
+    length_penalty=1.0,
+    no_repeat_ngram_size=0,
+    encoder_no_repeat_ngram_size=0,
+    num_return_sequences=1,
+    chunk_size_feed_forward=0,
+    output_scores=False,
+    use_cache=True,
+    bos_token_id=1,
+    eos_token_id=2,
+    pad_token_id=0,
+    # adapter
+    adapter_len=10,
+    adapter_layer=30,
+    # train
+    pretrained_model_path="meta-llama/Llama-3-8B/",
+)
+
+cfg = DictConfig(cfg)
+
+model = LazyCall(LlamaForCausalLM)(cfg=cfg)
+tokenization = OmegaConf.create()
+tokenization.make_vocab_size_divisible_by = 1
+tokenization.tokenizer = LazyCall(LlamaTokenizer)(
+    pretrained_model_path="Llama-3-8B/tokenizer.model"
+)