Merge remote-tracking branch 'upstream/main'

spiceai · Jul 31, 2024 · 2064fb0 · 2064fb0
2 parents 8b357f6 + 24d54d0
commit 2064fb0
Show file tree

Hide file tree

Showing 26 changed files with 168 additions and 74 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -49,7 +49,7 @@ gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
 hf-hub = "0.3.0"
 half = { version = "2.3.1", features = ["num-traits", "use-intrinsics", "rand_distr"] }
 hound = "3.5.1"
-image = { version = "0.25.0", default-features = false, features = ["jpeg", "png"] }
+image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
 imageproc = { version = "0.24.0", default-features = false }
 intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] }
 libc = { version = "0.2.147" }

diff --git a/candle-core/benches/benchmarks/affine.rs b/candle-core/benches/benchmarks/affine.rs
@@ -12,7 +12,7 @@ fn run_affine_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name:
     let m = 1024;
     let k = 1024;
 
-    let tensor = Tensor::zeros((b, m, k), dtype, &device).unwrap();
+    let tensor = Tensor::zeros((b, m, k), dtype, device).unwrap();
 
     let flops = b * m * k * dtype.size_in_bytes();
 

diff --git a/candle-core/benches/benchmarks/qmatmul.rs b/candle-core/benches/benchmarks/qmatmul.rs
@@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, Criterion, Throughput};
 use std::time::Instant;
 
 fn run(matmul: &QMatMul, x: &Tensor) {
-    matmul.forward(&x).unwrap();
+    matmul.forward(x).unwrap();
 }
 
 fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
@@ -50,7 +50,7 @@ fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
 fn criterion_benchmark(c: &mut Criterion) {
     let handler = BenchDeviceHandler::new().unwrap();
     for device in handler.devices {
-        for dtype in vec![
+        for dtype in [
             GgmlDType::F32,
             GgmlDType::F16,
             GgmlDType::Q4_0,

diff --git a/candle-core/benches/benchmarks/unary.rs b/candle-core/benches/benchmarks/unary.rs
@@ -12,7 +12,7 @@ fn run_unary_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &
     let m = 1024;
     let k = 1024;
 
-    let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, &device)
+    let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, device)
         .unwrap()
         .to_dtype(dtype)
         .unwrap()

diff --git a/candle-core/benches/benchmarks/where_cond.rs b/candle-core/benches/benchmarks/where_cond.rs
@@ -25,9 +25,9 @@ const SIZE: usize = B * M * K;
 const DATA: [u8; SIZE] = create_cond_arr::<SIZE>();
 
 fn run_where_cond_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) {
-    let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), &device).unwrap();
-    let on_true = Tensor::ones((B, M, K), dtype, &device).unwrap();
-    let on_false = Tensor::zeros((B, M, K), dtype, &device).unwrap();
+    let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), device).unwrap();
+    let on_true = Tensor::ones((B, M, K), dtype, device).unwrap();
+    let on_false = Tensor::zeros((B, M, K), dtype, device).unwrap();
 
     let elements = B * M * K;
     // E.g. 2 f32 tensors + 1 u8 tensor

diff --git a/candle-core/src/tensor.rs b/candle-core/src/tensor.rs
@@ -651,9 +651,9 @@ impl Tensor {
     ///
     /// * `args` - A slice of 1D tensors.
     /// * `xy_indexing` - Whether to use xy indexing or ij indexing. If xy is selected, the
-    /// first dimension corresponds to the cardinality of the second input and the second
-    /// dimension corresponds to the cardinality of the first input. If ij is selected, the
-    /// dimensions are in the same order as the cardinality of the inputs.
+    ///   first dimension corresponds to the cardinality of the second input and the second
+    ///   dimension corresponds to the cardinality of the first input. If ij is selected, the
+    ///   dimensions are in the same order as the cardinality of the inputs.
     ///
     /// # Examples
     ///

diff --git a/candle-examples/Cargo.toml b/candle-examples/Cargo.toml
@@ -35,7 +35,7 @@ serde = { workspace = true }
 serde_json = { workspace = true }
 symphonia = { version = "0.5.3", features = ["all"], optional = true }
 tokenizers = { workspace = true, features = ["onig"] }
-cpal= { version = "0.15.2", optional = true }
+cpal = { version = "0.15.2", optional = true }
 
 [dev-dependencies]
 anyhow = { workspace = true }

diff --git a/candle-examples/examples/llama/main.rs b/candle-examples/examples/llama/main.rs
@@ -32,7 +32,9 @@ enum Which {
     V1,
     V2,
     V3,
+    V31,
     V3Instruct,
+    V31Instruct,
     #[value(name = "solar-10.7b")]
     Solar10_7B,
     #[value(name = "tiny-llama-1.1b-chat")]
@@ -133,6 +135,8 @@ fn main() -> Result<()> {
             Which::V2 => "meta-llama/Llama-2-7b-hf".to_string(),
             Which::V3 => "meta-llama/Meta-Llama-3-8B".to_string(),
             Which::V3Instruct => "meta-llama/Meta-Llama-3-8B-Instruct".to_string(),
+            Which::V31 => "meta-llama/Meta-Llama-3.1-8B".to_string(),
+            Which::V31Instruct => "meta-llama/Meta-Llama-3.1-8B-Instruct".to_string(),
             Which::Solar10_7B => "upstage/SOLAR-10.7B-v1.0".to_string(),
             Which::TinyLlama1_1BChat => "TinyLlama/TinyLlama-1.1B-Chat-v1.0".to_string(),
         });
@@ -146,7 +150,13 @@ fn main() -> Result<()> {
         let config = config.into_config(args.use_flash_attn);
 
         let filenames = match args.which {
-            Which::V1 | Which::V2 | Which::V3 | Which::V3Instruct | Which::Solar10_7B => {
+            Which::V1
+            | Which::V2
+            | Which::V3
+            | Which::V3Instruct
+            | Which::V31
+            | Which::V31Instruct
+            | Which::Solar10_7B => {
                 candle_examples::hub_load_safetensors(&api, "model.safetensors.index.json")?
             }
             Which::TinyLlama1_1BChat => vec![api.get("model.safetensors")?],
@@ -157,9 +167,11 @@ fn main() -> Result<()> {
         (Llama::load(vb, &config)?, tokenizer_filename, cache, config)
     };
     let tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(E::msg)?;
-    let eos_token_id = config
-        .eos_token_id
-        .or_else(|| tokenizer.token_to_id(EOS_TOKEN));
+    let eos_token_id = config.eos_token_id.or_else(|| {
+        tokenizer
+            .token_to_id(EOS_TOKEN)
+            .map(model::LlamaEosToks::Single)
+    });
     let prompt = args.prompt.as_ref().map_or(DEFAULT_PROMPT, |p| p.as_str());
     let mut tokens = tokenizer
         .encode(prompt, true)
@@ -217,8 +229,14 @@ fn main() -> Result<()> {
         token_generated += 1;
         tokens.push(next_token);
 
-        if Some(next_token) == eos_token_id {
-            break;
+        match eos_token_id {
+            Some(model::LlamaEosToks::Single(eos_tok_id)) if next_token == eos_tok_id => {
+                break;
+            }
+            Some(model::LlamaEosToks::Multiple(ref eos_ids)) if eos_ids.contains(&next_token) => {
+                break;
+            }
+            _ => (),
         }
         if let Some(t) = tokenizer.next_token(next_token)? {
             print!("{t}");

diff --git a/candle-examples/examples/yolo-v3/darknet.rs b/candle-examples/examples/yolo-v3/darknet.rs
@@ -272,7 +272,7 @@ impl Darknet {
         let mut prev_channels: usize = 3;
         for (index, block) in self.blocks.iter().enumerate() {
             let channels_and_bl = match block.block_type.as_str() {
-                "convolutional" => conv(vb.pp(&index.to_string()), index, prev_channels, block)?,
+                "convolutional" => conv(vb.pp(index.to_string()), index, prev_channels, block)?,
                 "upsample" => upsample(prev_channels)?,
                 "shortcut" => shortcut(index, prev_channels, block)?,
                 "route" => route(index, &blocks, block)?,

diff --git a/candle-nn/src/activation.rs b/candle-nn/src/activation.rs
@@ -93,9 +93,9 @@ impl candle::Module for PReLU {
 /// # Arguments
 ///
 /// * `num_channels` - The number of channels. Use `None` to have as single trainable value and
-/// `Some` for a 1D vector with the appropriate number of channels. When applying the `forward`
-/// function, the input tensor shape `s` should either be one dimension with this number of
-/// channels or if `s.len() >= 2` it should have `s[1]` equal to this number.
+///   `Some` for a 1D vector with the appropriate number of channels. When applying the `forward`
+///   function, the input tensor shape `s` should either be one dimension with this number of
+///   channels or if `s.len() >= 2` it should have `s[1]` equal to this number.
 pub fn prelu(num_channels: Option<usize>, vs: crate::VarBuilder) -> Result<PReLU> {
     let init_ws = crate::init::Init::Const(0.25);
     // When using a scalar weight, the PyTorch encoding is to use a 1d vector of length 1.

diff --git a/candle-nn/src/var_builder.rs b/candle-nn/src/var_builder.rs
@@ -288,6 +288,7 @@ impl SimpleBackend for VarMap {
     }
 }
 
+#[allow(dead_code)]
 pub struct SafeTensorWithRouting<'a> {
     routing: HashMap<String, usize>,
     safetensors: Vec<SafeTensors<'a>>,

diff --git a/candle-transformers/src/models/beit.rs b/candle-transformers/src/models/beit.rs
@@ -288,7 +288,7 @@ impl BeitVisionTransformer {
         let norm = layer_norm(embed_dim, 1e-6, vb.pp("norm"))?;
         let vb_b = vb.pp("blocks");
         let blocks = (0..depth)
-            .map(|i| Block::new(vb_b.pp(&i.to_string()), embed_dim, num_heads))
+            .map(|i| Block::new(vb_b.pp(i.to_string()), embed_dim, num_heads))
             .collect::<Result<Vec<_>>>()?;
         Ok(Self {
             patch_embed,

diff --git a/candle-transformers/src/models/clip/text_model.rs b/candle-transformers/src/models/clip/text_model.rs
@@ -249,7 +249,7 @@ impl ClipEncoder {
         let vs = vs.pp("layers");
         let mut layers: Vec<ClipEncoderLayer> = Vec::new();
         for index in 0..c.num_hidden_layers() {
-            let layer = ClipEncoderLayer::new(vs.pp(&index.to_string()), c)?;
+            let layer = ClipEncoderLayer::new(vs.pp(index.to_string()), c)?;
             layers.push(layer)
         }
         Ok(ClipEncoder { layers })

diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs
@@ -214,7 +214,7 @@ impl DinoVisionTransformer {
         let norm = layer_norm(embed_dim, 1e-5, vb.pp("norm"))?;
         let vb_b = vb.pp("blocks");
         let blocks = (0..depth)
-            .map(|i| Block::new(vb_b.pp(&i.to_string()), embed_dim, num_heads))
+            .map(|i| Block::new(vb_b.pp(i.to_string()), embed_dim, num_heads))
             .collect::<Result<Vec<_>>>()?;
         Ok(Self {
             patch_embed,

diff --git a/candle-transformers/src/models/dinov2reg4.rs b/candle-transformers/src/models/dinov2reg4.rs
@@ -212,7 +212,7 @@ impl DinoVisionTransformer {
         let norm = layer_norm(embed_dim, 1e-6, vb.pp("norm"))?;
         let vb_b = vb.pp("blocks");
         let blocks = (0..depth)
-            .map(|i| Block::new(vb_b.pp(&i.to_string()), embed_dim, num_heads))
+            .map(|i| Block::new(vb_b.pp(i.to_string()), embed_dim, num_heads))
             .collect::<Result<Vec<_>>>()?;
         Ok(Self {
             patch_embed,

diff --git a/candle-transformers/src/models/encodec.rs b/candle-transformers/src/models/encodec.rs
@@ -571,7 +571,7 @@ impl<'a> Layer<'a> {
     }
 
     fn next(&mut self) -> VarBuilder {
-        let vb = self.vb.pp(&self.cnt.to_string());
+        let vb = self.vb.pp(self.cnt.to_string());
         self.cnt += 1;
         vb
     }

diff --git a/candle-transformers/src/models/eva2.rs b/candle-transformers/src/models/eva2.rs
@@ -255,14 +255,7 @@ impl EVA2VisionTransformer {
         let norm = layer_norm(embed_dim, 1e-6, vb.pp("norm"))?;
         let vb_b = vb.pp("blocks");
         let blocks = (0..depth)
-            .map(|i| {
-                Block::new(
-                    vb_b.pp(&i.to_string()),
-                    embed_dim,
-                    num_heads,
-                    &rot_pos_embed,
-                )
-            })
+            .map(|i| Block::new(vb_b.pp(i.to_string()), embed_dim, num_heads, &rot_pos_embed))
             .collect::<Result<Vec<_>>>()?;
         Ok(Self {
             patch_embed,