From d9c8a7db57871c9b2c07f35761c26a2ae25af072 Mon Sep 17 00:00:00 2001 From: AgenP <100027170+AgenP@users.noreply.github.com> Date: Tue, 6 Aug 2024 08:14:53 +0100 Subject: [PATCH] llama 3.1 compatability e --- config/llama-3.1.yml | 101 +++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 ++ src/inference.py | 52 +++++++++++++++++++++- src/train.py | 75 ++++++++++++++++++++++++++++---- 4 files changed, 221 insertions(+), 11 deletions(-) create mode 100644 config/llama-3.1.yml create mode 100644 requirements.txt diff --git a/config/llama-3.1.yml b/config/llama-3.1.yml new file mode 100644 index 00000000..e71f97dd --- /dev/null +++ b/config/llama-3.1.yml @@ -0,0 +1,101 @@ +### +# Model Configuration: LLaMA-3.1 8B +### + +base_model: meta-llama/Meta-Llama-3.1-8B +sequence_len: 4096 + +# base model weight quantization +load_in_8bit: true + +# attention implementation +flash_attention: true + +# finetuned adapter config +adapter: lora +lora_model_dir: +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +lora_modules_to_save: # required when adding new tokens to LLaMA/Mistral + - embed_tokens + - lm_head +# for details, see https://github.com/huggingface/peft/issues/334#issuecomment-1561727994 + +### +# Dataset Configuration: sqlqa +### + +datasets: + # This will be the path used for the data when it is saved to the Volume in the cloud. + - path: data.jsonl + ds_type: json + type: + # JSONL file contains question, context, answer fields per line. + # This gets mapped to instruction, input, output axolotl tags. + field_instruction: question + field_input: context + field_output: answer + # Format is used by axolotl to generate the prompt. + format: |- + [INST] Using the schema context below, generate a SQL query that answers the question. + {input} + {instruction} [/INST] + +# dataset formatting config +tokens: # add new control tokens from the dataset to the model + - "[INST]" + - " [/INST]" + - "[SQL]" + - " [/SQL]" + +special_tokens: + pad_token: <|end_of_text|> + +val_set_size: 0.05 + +### +# Training Configuration +### + +# random seed for better reproducibility +seed: 117 + +# optimizer config +optimizer: adamw_bnb_8bit +learning_rate: 0.0001 +lr_scheduler: cosine +num_epochs: 4 +micro_batch_size: 32 +gradient_accumulation_steps: 1 +warmup_steps: 10 + +# axolotl saving config +dataset_prepared_path: last_run_prepared +output_dir: ./lora-out + +# logging and eval config +logging_steps: 1 +eval_steps: 0.05 + +# training performance optimization config +bf16: auto +tf32: false +gradient_checkpointing: true + +# Optional wandb logging (uncomment to use) +# wandb_project: llama-3.1-fine-tuning +# wandb_watch: all + +### +# Miscellaneous Configuration +### + +# when true, prevents over-writing the config from the CLI +strict: false + +# "Don't mess with this, it's here for accelerate and torchrun" -- axolotl docs +local_rank: + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..d1c10f6a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +modal +pyyaml +# For ci (check_loss.py) +# pandas \ No newline at end of file diff --git a/src/inference.py b/src/inference.py index 700a59ac..d491b3d9 100644 --- a/src/inference.py +++ b/src/inference.py @@ -1,12 +1,60 @@ import os import time import yaml -from pathlib import Path +from pathlib import Path, PurePosixPath import modal from fastapi.responses import StreamingResponse -from .common import app, vllm_image, Colors, MINUTES, VOLUME_CONFIG +# from .common import app, vllm_image, Colors, MINUTES, VOLUME_CONFIG + +from typing import Union + +import modal + +APP_NAME = "example-axolotl" + +MINUTES = 60 # seconds +HOURS = 60 * MINUTES + + +ALLOW_WANDB = os.environ.get("ALLOW_WANDB", "false").lower() == "true" + +vllm_image = ( + modal.Image.from_registry("nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10") + .pip_install("vllm==v0.5.3.post1", "torch==2.3.1") + .entrypoint([]) +) + +app = modal.App( + APP_NAME, + secrets=[ + modal.Secret.from_name("huggingface"), + modal.Secret.from_dict({"ALLOW_WANDB": os.environ.get("ALLOW_WANDB", "false")}), + *([modal.Secret.from_name("wandb")] if ALLOW_WANDB else []), + ], +) + +# Volumes for pre-trained models and training runs. +pretrained_volume = modal.Volume.from_name( + "example-pretrained-vol", create_if_missing=True +) +runs_volume = modal.Volume.from_name("example-runs-vol", create_if_missing=True) +VOLUME_CONFIG: dict[Union[str, PurePosixPath], modal.Volume] = { + "/pretrained": pretrained_volume, + "/runs": runs_volume, +} + + +class Colors: + """ANSI color codes""" + + GREEN = "\033[0;32m" + BLUE = "\033[0;34m" + GRAY = "\033[0;90m" + BOLD = "\033[1m" + END = "\033[0m" + INFERENCE_GPU_CONFIG = os.environ.get("INFERENCE_GPU_CONFIG", "a10g:2") if len(INFERENCE_GPU_CONFIG.split(":")) <= 1: diff --git a/src/train.py b/src/train.py index 3aa412b8..3dcc0a7a 100644 --- a/src/train.py +++ b/src/train.py @@ -1,16 +1,75 @@ import os from datetime import datetime -from pathlib import Path +from pathlib import Path, PurePosixPath import secrets -from .common import ( - app, - axolotl_image, - HOURS, - MINUTES, - VOLUME_CONFIG, +# from .common import ( +# app, +# axolotl_image, +# HOURS, +# MINUTES, +# VOLUME_CONFIG, +# ) + +from typing import Union + +import modal + +APP_NAME = "example-axolotl" + +MINUTES = 60 # seconds +HOURS = 60 * MINUTES + + +# Updated image main-20240805-py3.11-cu121-2.3.1 for Llama-3.1 compatibility +AXOLOTL_REGISTRY_SHA = ( + "30ecbf47963eb1a6b8f3808b2f11951d6aba61ea6d7065c009841e8d761775cf" ) +ALLOW_WANDB = os.environ.get("ALLOW_WANDB", "false").lower() == "true" + +axolotl_image = ( + modal.Image.from_registry(f"winglian/axolotl@sha256:{AXOLOTL_REGISTRY_SHA}") + .pip_install( + "huggingface_hub==0.23.2", + "hf-transfer==0.1.5", + "wandb==0.16.3", + "fastapi==0.110.0", + "pydantic==2.6.3", + ) + .env( + dict( + HUGGINGFACE_HUB_CACHE="/pretrained", + HF_HUB_ENABLE_HF_TRANSFER="1", + TQDM_DISABLE="true", + AXOLOTL_NCCL_TIMEOUT="60", + ) + ) + .entrypoint([]) +) + + +app = modal.App( + APP_NAME, + secrets=[ + modal.Secret.from_name("huggingface"), + modal.Secret.from_dict({"ALLOW_WANDB": os.environ.get("ALLOW_WANDB", "false")}), + *([modal.Secret.from_name("wandb")] if ALLOW_WANDB else []), + ], +) + +# Volumes for pre-trained models and training runs. +pretrained_volume = modal.Volume.from_name( + "example-pretrained-vol", create_if_missing=True +) + +runs_volume = modal.Volume.from_name("example-runs-vol", create_if_missing=True) +VOLUME_CONFIG: dict[Union[str, PurePosixPath], modal.Volume] = { + "/pretrained": pretrained_volume, + "/runs": runs_volume, +} + + GPU_CONFIG = os.environ.get("GPU_CONFIG", "a100:2") if len(GPU_CONFIG.split(":")) <= 1: N_GPUS = int(os.environ.get("N_GPUS", 2)) @@ -23,7 +82,6 @@ gpu=GPU_CONFIG, volumes=VOLUME_CONFIG, timeout=24 * HOURS, - _allow_background_volume_commits=True, ) def train(run_folder: str, output_dir: str): import torch @@ -48,7 +106,6 @@ def train(run_folder: str, output_dir: str): gpu=SINGLE_GPU_CONFIG, volumes=VOLUME_CONFIG, timeout=24 * HOURS, - _allow_background_volume_commits=True, ) def preproc_data(run_folder: str): print("Preprocessing data.")