forked from modal-labs/llm-finetuning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommon.py
50 lines (43 loc) · 1.67 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from modal import Stub, Image, Volume, Secret
N_GPUS = 4
GPU_MEM = 80
BASE_MODELS = {
"base7": "meta-llama/Llama-2-7b-hf",
"chat7": "meta-llama/Llama-2-7b-chat-hf",
"chat13": "meta-llama/Llama-2-13b-chat-hf",
"code7": "codellama/CodeLlama-7b-hf",
"code34": "codellama/CodeLlama-34b-hf",
"instruct7": "codellama/CodeLlama-7b-Instruct-hf",
"instruct13": "codellama/CodeLlama-13b-Instruct-hf",
"instruct34": "codellama/CodeLlama-34b-Instruct-hf",
# Training 70B requires experimental flag fsdp_peft_cpu_offload_for_save.
"chat70": "meta-llama/Llama-2-70b-chat-hf",
"base70": "meta-llama/Llama-2-70b-hf",
}
image = (
Image.micromamba()
.micromamba_install(
"cudatoolkit=11.8",
"cudnn=8.1.0",
"cuda-nvcc",
channels=["conda-forge", "nvidia"],
)
.apt_install("git")
.pip_install(
"llama-recipes @ git+https://github.com/Llama2D/llama-recipes.git@14c698001ee6fb840197e676f48f555d3b733b60",
extra_index_url="https://download.pytorch.org/whl/nightly/cu118",
pre=True,
)
.pip_install("huggingface_hub==0.17.1", "hf-transfer==0.1.3", "scipy")
.pip_install("wandb")
.env(dict(HUGGINGFACE_HUB_CACHE="/pretrained", HF_HUB_ENABLE_HF_TRANSFER="1"))
)
stub = Stub("llama-finetuning", image=image, secrets=[Secret.from_name("huggingface"), Secret.from_name("wandb")])
# Download pre-trained models into this volume.
stub.pretrained_volume = Volume.persisted("example-pretrained-vol")
# Save trained models into this volume.
stub.results_volume = Volume.persisted("example-results-vol")
VOLUME_CONFIG = {
"/pretrained": stub.pretrained_volume,
"/results": stub.results_volume,
}