Skip to content

Commit

Permalink
fix missing layers names
Browse files Browse the repository at this point in the history
  • Loading branch information
vince62s committed Jun 13, 2024
1 parent 3a9b137 commit 1b1a177
Show file tree
Hide file tree
Showing 10 changed files with 16 additions and 16 deletions.
4 changes: 2 additions & 2 deletions docs/docusaurus_tsx/docs/FAQ/lora.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ Also you can read the blog post here: https://huggingface.co/blog/hf-bitsandbyte

You need to add the following option:

* `quant_layers: ['w_1', 'w_2', 'linear_values', 'linear_query']`
* `quant_layers: ['up_proj', 'down_proj', 'linear_values', 'linear_query']`
* `quant_type: ['bnb_NF4']`

You can for instane quantize the layers of the PositionWise Feed-Forward from the Encoder/Decoder and the key/query/values/final from the Multi-head attention.
Choices for quantization are ["bnb_8bit", "bnb_FP4", "bnb_NF4"]
Choices for quantization are ["bnb_8bit", "bnb_FP4", "bnb_NF4"]
6 changes: 3 additions & 3 deletions eole/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,16 +553,16 @@ def _load_param(self, name, module, param_name, param, buf_list, ckpt_t, offset)
"linear_keys",
"linear_values",
"linear_query",
"w_1",
"w_3",
"gate_up_proj",
"up_proj",
]:
col_slice_start = param.data.size(0) * offset
col_slice_end = param.data.size(0) * (offset + 1)
else:
col_slice_start = 0
col_slice_end = param.data.size(0)
if param.data.dim() == 2:
if name.split(".")[-1] in ["final_linear", "w_2"]:
if name.split(".")[-1] in ["final_linear", "up_proj"]:
row_slice_start = param.data.size(1) * offset
row_slice_end = param.data.size(1) * (offset + 1)
else:
Expand Down
6 changes: 3 additions & 3 deletions eole/models/model_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,10 @@ def _tensor_parallel_state_dict(self, model_state_dict, world_size):
"linear_keys",
"linear_values",
"linear_query",
"w_1",
"w_3",
"gate_up_proj",
"up_proj",
}
cat_params = {"final_linear", "w_2"}
cat_params = {"final_linear", "down_proj"}
# we probably should try and improve this to rely on dimensions instead of names
match key_1, key_2:
case "lora_A", _ if key_2 in averaged_params:
Expand Down
2 changes: 1 addition & 1 deletion recipes/llama2/llama-finetune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ training:
valid_steps: 100

# 4/8bit
quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
quant_type: "bnb_NF4"

# LoRa
Expand Down
2 changes: 1 addition & 1 deletion recipes/llama2/llama-inference-tp-2gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ batch_size: 8
world_size: 2
gpu_ranks: [0, 1]
parallel_mode: "tensor_parallel"
quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
quant_type: "bnb_NF4"
precision: fp16
random_sampling_topk: 5
Expand Down
2 changes: 1 addition & 1 deletion recipes/llama2/llama-inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ batch_size: 8
world_size: 1
gpu_ranks: [0]
#parallel_mode: "tensor_parallel"
quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
quant_type: "bnb_NF4"
precision: fp16
#random_sampling_topk: 1
Expand Down
2 changes: 1 addition & 1 deletion recipes/llama3/llama-inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ gpu_ranks: [0]
# world_size: 2
# gpu_ranks: [0, 1]
# parallel_mode: "tensor_parallel"
# quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
# quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
# quant_type: "bnb_NF4"
precision: fp16
#random_sampling_topk: 1
Expand Down
2 changes: 1 addition & 1 deletion recipes/llama3/llama-mmlu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ gpu_ranks: [0]
# world_size: 2
# gpu_ranks: [0, 1]
# parallel_mode: "tensor_parallel"
# quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
# quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
# quant_type: "bnb_NF4"
precision: fp16
#random_sampling_topk: 1
Expand Down
2 changes: 1 addition & 1 deletion recipes/mistral/mistral-7b-awq-gemm-inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ gpu_ranks: [0]
# world_size: 2
# gpu_ranks: [0, 1]
# parallel_mode: "tensor_parallel"
#quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
#quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
#quant_type: "bnb_NF4"
precision: fp16
#random_sampling_topk: 1
Expand Down
4 changes: 2 additions & 2 deletions recipes/mixtral/mixtral-inference-awq.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ batch_size: 1
world_size: 2
gpu_ranks: [0, 1]
parallel_mode: "tensor_parallel"
#quant_layers: ['w_1', 'w_2', 'w_3', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
#quant_layers: ['w_1', 'w_2', 'w_3']
#quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
#quant_layers: ['gate_up_proj', 'down_proj', 'up_proj']
#quant_type: "bnb_sparse"
precision: fp16
#random_sampling_topk: 1
Expand Down

0 comments on commit 1b1a177

Please sign in to comment.