Skip to content

Commit

Permalink
Add support for Llama 3.1 (huggingface#2359)
Browse files Browse the repository at this point in the history
* Add Llama 3.1 rope

* Clippy

* Format

* Clippy

* Add support for multiple eos tokens:

* Untagged either

* Remove either dep and fix settings.json

* Make the max positional embeddings configurable
  • Loading branch information
EricLBuehler authored Jul 26, 2024
1 parent ddafc61 commit 0f5cbb0
Show file tree
Hide file tree
Showing 24 changed files with 165 additions and 71 deletions.
2 changes: 1 addition & 1 deletion candle-core/benches/benchmarks/affine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fn run_affine_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name:
let m = 1024;
let k = 1024;

let tensor = Tensor::zeros((b, m, k), dtype, &device).unwrap();
let tensor = Tensor::zeros((b, m, k), dtype, device).unwrap();

let flops = b * m * k * dtype.size_in_bytes();

Expand Down
4 changes: 2 additions & 2 deletions candle-core/benches/benchmarks/qmatmul.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, Criterion, Throughput};
use std::time::Instant;

fn run(matmul: &QMatMul, x: &Tensor) {
matmul.forward(&x).unwrap();
matmul.forward(x).unwrap();
}

fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
Expand Down Expand Up @@ -50,7 +50,7 @@ fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
fn criterion_benchmark(c: &mut Criterion) {
let handler = BenchDeviceHandler::new().unwrap();
for device in handler.devices {
for dtype in vec![
for dtype in [
GgmlDType::F32,
GgmlDType::F16,
GgmlDType::Q4_0,
Expand Down
2 changes: 1 addition & 1 deletion candle-core/benches/benchmarks/unary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fn run_unary_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &
let m = 1024;
let k = 1024;

let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, &device)
let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, device)
.unwrap()
.to_dtype(dtype)
.unwrap()
Expand Down
6 changes: 3 additions & 3 deletions candle-core/benches/benchmarks/where_cond.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ const SIZE: usize = B * M * K;
const DATA: [u8; SIZE] = create_cond_arr::<SIZE>();

fn run_where_cond_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) {
let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), &device).unwrap();
let on_true = Tensor::ones((B, M, K), dtype, &device).unwrap();
let on_false = Tensor::zeros((B, M, K), dtype, &device).unwrap();
let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), device).unwrap();
let on_true = Tensor::ones((B, M, K), dtype, device).unwrap();
let on_false = Tensor::zeros((B, M, K), dtype, device).unwrap();

let elements = B * M * K;
// E.g. 2 f32 tensors + 1 u8 tensor
Expand Down
6 changes: 3 additions & 3 deletions candle-core/src/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -590,9 +590,9 @@ impl Tensor {
///
/// * `args` - A slice of 1D tensors.
/// * `xy_indexing` - Whether to use xy indexing or ij indexing. If xy is selected, the
/// first dimension corresponds to the cardinality of the second input and the second
/// dimension corresponds to the cardinality of the first input. If ij is selected, the
/// dimensions are in the same order as the cardinality of the inputs.
/// first dimension corresponds to the cardinality of the second input and the second
/// dimension corresponds to the cardinality of the first input. If ij is selected, the
/// dimensions are in the same order as the cardinality of the inputs.
///
/// # Examples
///
Expand Down
2 changes: 1 addition & 1 deletion candle-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ serde = { workspace = true }
serde_json = { workspace = true }
symphonia = { version = "0.5.3", features = ["all"], optional = true }
tokenizers = { workspace = true, features = ["onig"] }
cpal= { version = "0.15.2", optional = true }
cpal = { version = "0.15.2", optional = true }

[dev-dependencies]
anyhow = { workspace = true }
Expand Down
30 changes: 24 additions & 6 deletions candle-examples/examples/llama/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ enum Which {
V1,
V2,
V3,
V31,
V3Instruct,
V31Instruct,
#[value(name = "solar-10.7b")]
Solar10_7B,
#[value(name = "tiny-llama-1.1b-chat")]
Expand Down Expand Up @@ -133,6 +135,8 @@ fn main() -> Result<()> {
Which::V2 => "meta-llama/Llama-2-7b-hf".to_string(),
Which::V3 => "meta-llama/Meta-Llama-3-8B".to_string(),
Which::V3Instruct => "meta-llama/Meta-Llama-3-8B-Instruct".to_string(),
Which::V31 => "meta-llama/Meta-Llama-3.1-8B".to_string(),
Which::V31Instruct => "meta-llama/Meta-Llama-3.1-8B-Instruct".to_string(),
Which::Solar10_7B => "upstage/SOLAR-10.7B-v1.0".to_string(),
Which::TinyLlama1_1BChat => "TinyLlama/TinyLlama-1.1B-Chat-v1.0".to_string(),
});
Expand All @@ -146,7 +150,13 @@ fn main() -> Result<()> {
let config = config.into_config(args.use_flash_attn);

let filenames = match args.which {
Which::V1 | Which::V2 | Which::V3 | Which::V3Instruct | Which::Solar10_7B => {
Which::V1
| Which::V2
| Which::V3
| Which::V3Instruct
| Which::V31
| Which::V31Instruct
| Which::Solar10_7B => {
candle_examples::hub_load_safetensors(&api, "model.safetensors.index.json")?
}
Which::TinyLlama1_1BChat => vec![api.get("model.safetensors")?],
Expand All @@ -157,9 +167,11 @@ fn main() -> Result<()> {
(Llama::load(vb, &config)?, tokenizer_filename, cache, config)
};
let tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(E::msg)?;
let eos_token_id = config
.eos_token_id
.or_else(|| tokenizer.token_to_id(EOS_TOKEN));
let eos_token_id = config.eos_token_id.or_else(|| {
tokenizer
.token_to_id(EOS_TOKEN)
.map(model::LlamaEosToks::Single)
});
let prompt = args.prompt.as_ref().map_or(DEFAULT_PROMPT, |p| p.as_str());
let mut tokens = tokenizer
.encode(prompt, true)
Expand Down Expand Up @@ -217,8 +229,14 @@ fn main() -> Result<()> {
token_generated += 1;
tokens.push(next_token);

if Some(next_token) == eos_token_id {
break;
match eos_token_id {
Some(model::LlamaEosToks::Single(eos_tok_id)) if next_token == eos_tok_id => {
break;
}
Some(model::LlamaEosToks::Multiple(ref eos_ids)) if eos_ids.contains(&next_token) => {
break;
}
_ => (),
}
if let Some(t) = tokenizer.next_token(next_token)? {
print!("{t}");
Expand Down
2 changes: 1 addition & 1 deletion candle-examples/examples/yolo-v3/darknet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ impl Darknet {
let mut prev_channels: usize = 3;
for (index, block) in self.blocks.iter().enumerate() {
let channels_and_bl = match block.block_type.as_str() {
"convolutional" => conv(vb.pp(&index.to_string()), index, prev_channels, block)?,
"convolutional" => conv(vb.pp(index.to_string()), index, prev_channels, block)?,
"upsample" => upsample(prev_channels)?,
"shortcut" => shortcut(index, prev_channels, block)?,
"route" => route(index, &blocks, block)?,
Expand Down
6 changes: 3 additions & 3 deletions candle-nn/src/activation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ impl candle::Module for PReLU {
/// # Arguments
///
/// * `num_channels` - The number of channels. Use `None` to have as single trainable value and
/// `Some` for a 1D vector with the appropriate number of channels. When applying the `forward`
/// function, the input tensor shape `s` should either be one dimension with this number of
/// channels or if `s.len() >= 2` it should have `s[1]` equal to this number.
/// `Some` for a 1D vector with the appropriate number of channels. When applying the `forward`
/// function, the input tensor shape `s` should either be one dimension with this number of
/// channels or if `s.len() >= 2` it should have `s[1]` equal to this number.
pub fn prelu(num_channels: Option<usize>, vs: crate::VarBuilder) -> Result<PReLU> {
let init_ws = crate::init::Init::Const(0.25);
// When using a scalar weight, the PyTorch encoding is to use a 1d vector of length 1.
Expand Down
1 change: 1 addition & 0 deletions candle-nn/src/var_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ impl SimpleBackend for VarMap {
}
}

#[allow(dead_code)]
pub struct SafeTensorWithRouting<'a> {
routing: HashMap<String, usize>,
safetensors: Vec<SafeTensors<'a>>,
Expand Down
2 changes: 1 addition & 1 deletion candle-transformers/src/models/beit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ impl BeitVisionTransformer {
let norm = layer_norm(embed_dim, 1e-6, vb.pp("norm"))?;
let vb_b = vb.pp("blocks");
let blocks = (0..depth)
.map(|i| Block::new(vb_b.pp(&i.to_string()), embed_dim, num_heads))
.map(|i| Block::new(vb_b.pp(i.to_string()), embed_dim, num_heads))
.collect::<Result<Vec<_>>>()?;
Ok(Self {
patch_embed,
Expand Down
2 changes: 1 addition & 1 deletion candle-transformers/src/models/clip/text_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ impl ClipEncoder {
let vs = vs.pp("layers");
let mut layers: Vec<ClipEncoderLayer> = Vec::new();
for index in 0..c.num_hidden_layers() {
let layer = ClipEncoderLayer::new(vs.pp(&index.to_string()), c)?;
let layer = ClipEncoderLayer::new(vs.pp(index.to_string()), c)?;
layers.push(layer)
}
Ok(ClipEncoder { layers })
Expand Down
2 changes: 1 addition & 1 deletion candle-transformers/src/models/dinov2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ impl DinoVisionTransformer {
let norm = layer_norm(embed_dim, 1e-5, vb.pp("norm"))?;
let vb_b = vb.pp("blocks");
let blocks = (0..depth)
.map(|i| Block::new(vb_b.pp(&i.to_string()), embed_dim, num_heads))
.map(|i| Block::new(vb_b.pp(i.to_string()), embed_dim, num_heads))
.collect::<Result<Vec<_>>>()?;
Ok(Self {
patch_embed,
Expand Down
2 changes: 1 addition & 1 deletion candle-transformers/src/models/dinov2reg4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ impl DinoVisionTransformer {
let norm = layer_norm(embed_dim, 1e-6, vb.pp("norm"))?;
let vb_b = vb.pp("blocks");
let blocks = (0..depth)
.map(|i| Block::new(vb_b.pp(&i.to_string()), embed_dim, num_heads))
.map(|i| Block::new(vb_b.pp(i.to_string()), embed_dim, num_heads))
.collect::<Result<Vec<_>>>()?;
Ok(Self {
patch_embed,
Expand Down
2 changes: 1 addition & 1 deletion candle-transformers/src/models/encodec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ impl<'a> Layer<'a> {
}

fn next(&mut self) -> VarBuilder {
let vb = self.vb.pp(&self.cnt.to_string());
let vb = self.vb.pp(self.cnt.to_string());
self.cnt += 1;
vb
}
Expand Down
9 changes: 1 addition & 8 deletions candle-transformers/src/models/eva2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,14 +255,7 @@ impl EVA2VisionTransformer {
let norm = layer_norm(embed_dim, 1e-6, vb.pp("norm"))?;
let vb_b = vb.pp("blocks");
let blocks = (0..depth)
.map(|i| {
Block::new(
vb_b.pp(&i.to_string()),
embed_dim,
num_heads,
&rot_pos_embed,
)
})
.map(|i| Block::new(vb_b.pp(i.to_string()), embed_dim, num_heads, &rot_pos_embed))
.collect::<Result<Vec<_>>>()?;
Ok(Self {
patch_embed,
Expand Down
Loading

0 comments on commit 0f5cbb0

Please sign in to comment.