From 1630bd275359bd3d0e523af462b57840ac9f3ab8 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 09:54:27 -0500 Subject: [PATCH 01/20] dinov2 --- candle-transformers/src/models/dinov2.rs | 25 ++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs index df8834d1f7..a0bb89abaa 100644 --- a/candle-transformers/src/models/dinov2.rs +++ b/candle-transformers/src/models/dinov2.rs @@ -1,8 +1,29 @@ //! Implementation of the DINOv2 models from Meta Research. //! -//! See: -//! - DINOv2: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2) +//! This module implements the DINOv2 vision transformer model from Meta AI Research. +//! DINOv2 is a self-supervised learning model that can learn visual features +//! without using any labeled data. See: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2) //! +//! # Example usage: +//! +//! ```rust,no_run +//! # use candle::Result; +//! # fn main() -> Result<()> { +//! use candle_transformers::dinov2::vit_small; +//! let model = vit_small(vb)?; +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Running an example with color map and CUDA +//! +//! ```bash +//! cargo run --features cuda,depth_anything_v2 \ +//! --package candle-examples \ +//! --example depth_anything_v2 +//! -- --color-map --image candle-examples/examples/yolo-v8/assets/bike.jpg +//! ``` + use candle::{IndexOp, Result, Tensor, D}; use candle_nn::{layer_norm, LayerNorm, Linear, Module, VarBuilder}; From d594eca8c541c356b8a4069f94402bde83a44e6b Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 10:00:26 -0500 Subject: [PATCH 02/20] add another example --- candle-transformers/src/models/dinov2.rs | 32 +++++++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs index a0bb89abaa..d9f1e9a1d9 100644 --- a/candle-transformers/src/models/dinov2.rs +++ b/candle-transformers/src/models/dinov2.rs @@ -4,7 +4,7 @@ //! DINOv2 is a self-supervised learning model that can learn visual features //! without using any labeled data. See: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2) //! -//! # Example usage: +//! # Example usage //! //! ```rust,no_run //! # use candle::Result; @@ -18,11 +18,35 @@ //! ## Running an example with color map and CUDA //! //! ```bash -//! cargo run --features cuda,depth_anything_v2 \ +//! cargo run \ +//! --features cuda,depth_anything_v2 \ //! --package candle-examples \ -//! --example depth_anything_v2 -//! -- --color-map --image candle-examples/examples/yolo-v8/assets/bike.jpg +//! --example depth_anything_v2 \ +//! -- --color-map \ +//! --image candle-examples/examples/yolo-v8/assets/bike.jpg //! ``` +//! +//! ## Running as an ImageNet classifier +//! +//! The model returns the probability for the image to belong to each of the 1000 ImageNet categories. +//! +//!
+//! +//!
+//! +//! ```bash +//! cargo run \ +//! --example dinov2 \ +//! --release \ +//! -- --image candle-examples/examples/yolo-v8/assets/bike.jpg +//! +//! > mountain bike, all-terrain bike, off-roader: 43.67% +//! > bicycle-built-for-two, tandem bicycle, tandem: 33.20% +//! > crash helmet : 13.23% +//! > unicycle, monocycle : 2.44% +//! > maillot : 2.42% +//! ``` +//! use candle::{IndexOp, Result, Tensor, D}; use candle_nn::{layer_norm, LayerNorm, Linear, Module, VarBuilder}; From 7b34dfc60b04289e4e1a9cd758ff2dd80f5f0bf0 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 10:10:48 -0500 Subject: [PATCH 03/20] ad dinov2reg4 --- candle-transformers/src/models/dinov2reg4.rs | 31 ++++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/candle-transformers/src/models/dinov2reg4.rs b/candle-transformers/src/models/dinov2reg4.rs index 0d2320e14c..549f2c3ce5 100644 --- a/candle-transformers/src/models/dinov2reg4.rs +++ b/candle-transformers/src/models/dinov2reg4.rs @@ -1,9 +1,34 @@ //! Implementation of the DINOv2 revision (4 regularization) //! -//! See: -//! - DINOv2: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2) +//! The DINOv2-reg4 model is a variant of DINOv2 that adds 4 regularization tokens to the +//! original architecture. This implementation is specifically trained for plant species +//! classification on the PlantCLEF2024 dataset with 7,806 classes. //! -//! This code implements the regularization tokens version with 4 regularization tokens. +//! - [Paper](https://arxiv.org/abs/2309.16588). DINOv2: Learning Robust Visual Features without Supervision +//! - [GH Repo](https://github.com/facebookresearch/dinov2) +//! +//! # Example +//! +//! ```bash +//! # Download classes names and a plant picture to identify +//! # see candle/examples/dinov2reg4 for full code. +//! +//! # Perform inference +//! cargo run \ +//! --example dinov2reg4 \ +//! --release -- \ +//! --image +//! +//! > Orchis simia Lam. : 45.55% +//! > Orchis × bergonii Nanteuil: 9.80% +//! > Orchis italica Poir. : 9.66% +//! > Orchis × angusticruris Franch.: 2.76% +//! > Orchis × bivonae Tod. : 2.54% +//! ``` +//! +//!
+//! +//!
//! use candle::{IndexOp, Result, Tensor, D}; use candle_nn::{layer_norm, LayerNorm, Linear, Module, VarBuilder}; From 4e0cee2b8e2bcaa59db3de95b04625861804a279 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 10:17:32 -0500 Subject: [PATCH 04/20] eva2 --- candle-transformers/src/models/dinov2.rs | 2 +- candle-transformers/src/models/eva2.rs | 28 +++++++++++++++++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs index d9f1e9a1d9..c737042547 100644 --- a/candle-transformers/src/models/dinov2.rs +++ b/candle-transformers/src/models/dinov2.rs @@ -31,7 +31,7 @@ //! The model returns the probability for the image to belong to each of the 1000 ImageNet categories. //! //!
-//! +//! //!
//! //! ```bash diff --git a/candle-transformers/src/models/eva2.rs b/candle-transformers/src/models/eva2.rs index ee84cca43c..9e31f58c73 100644 --- a/candle-transformers/src/models/eva2.rs +++ b/candle-transformers/src/models/eva2.rs @@ -1,9 +1,31 @@ //! EVA-2 inference implementation. //! -//! See ["EVA-02: A Visual Representation for Neon Genesis"](https://arxiv.org/abs/2303.11331) +//! EVA-02 is a computer vision model that can be used as an ImageNet classifier. +//! The model returns the probability for an image to belong to each of the 1000 +//! ImageNet categories. +//! +//! - [Paper](https://arxiv.org/abs/2303.11331). EVA-02: A Visual Representation for Neon Genesis +//! - [Code](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/eva2.py) +//! +//! # Example +//! +//! ```bash +//! cargo run \ +//! --example eva2 \ +//! --release -- \ +//! --image candle-examples/examples/yolo-v8/assets/bike.jpg +//! +//! > mountain bike, all-terrain bike, off-roader: 37.09% +//! > maillot : 8.30% +//! > alp : 2.13% +//! > bicycle-built-for-two, tandem bicycle, tandem: 0.84% +//! > crash helmet : 0.73% +//! ``` +//! +//!
+//! +//!
//! -//! Based on implementation from [pytorch-image-models](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/eva2.py) - use candle::{IndexOp, Result, Tensor, D}; use candle_nn::{layer_norm, LayerNorm, Linear, Module, VarBuilder}; From 0328ede151b848349ca4df516a5e2c1e0c1329d0 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 10:21:39 -0500 Subject: [PATCH 05/20] efficientvit --- .../src/models/efficientvit.rs | 37 +++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/candle-transformers/src/models/efficientvit.rs b/candle-transformers/src/models/efficientvit.rs index 9724f702a6..4c231d7679 100644 --- a/candle-transformers/src/models/efficientvit.rs +++ b/candle-transformers/src/models/efficientvit.rs @@ -1,9 +1,40 @@ //! EfficientViT (MSRA) inference implementation based on timm. //! -//! See ["EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention"](https://arxiv.org/abs/2305.07027) +//! This crate provides an implementation of the EfficientViT model from Microsoft Research Asia +//! for efficient image classification. The model uses cascaded group attention modules +//! to achieve strong performance while maintaining low memory usage. +//! +//! The model was originally described in the paper: +//! ["EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention"](https://arxiv.org/abs/2305.07027) +//! +//! This implementation is based on the reference implementation from +//! [pytorch-image-models](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/efficientvit_msra.py). +//! +//! # Example Usage +//! +//! This candle implementation uses a pre-trained EfficientViT (from Microsoft Research Asia) network for inference. +//! The classification head has been trained on the ImageNet dataset and returns the probabilities for the top-5 classes. +//! +//! +//! ```bash +//! cargo run +//! --example efficientvit \ +//! --release -- \ +//! --image candle-examples/examples/yolo-v8/assets/bike.jpg --which m1 +//! +//! > loaded image Tensor[dims 3, 224, 224; f32] +//! > model built +//! > mountain bike, all-terrain bike, off-roader: 69.80% +//! > unicycle, monocycle : 13.03% +//! > bicycle-built-for-two, tandem bicycle, tandem: 9.28% +//! > crash helmet : 2.25% +//! > alp : 0.46% +//! ``` +//! +//!
+//! +//!
//! -//! Based on implementation from [pytorch-image-models](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/efficientvit_msra.py) - use candle::{Result, Tensor, D}; use candle_nn::{ batch_norm, conv2d, conv2d_no_bias, linear, ops::sigmoid, ops::softmax, Conv2dConfig, Func, From 42fd847609f329ce7ae7348581685a6cce077f95 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 10:27:05 -0500 Subject: [PATCH 06/20] moondream --- candle-transformers/src/models/moondream.rs | 30 +++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/candle-transformers/src/models/moondream.rs b/candle-transformers/src/models/moondream.rs index d351d7c019..a9dc9b7dc2 100644 --- a/candle-transformers/src/models/moondream.rs +++ b/candle-transformers/src/models/moondream.rs @@ -1,13 +1,39 @@ //! MoonDream Model vision-to-text //! +//! +//! Moondream is a computer-vision model that can answer real-world questions about images. +//! It's lightweight with only 1.6B parameters, enabling it to run on mobile phones and edge devices. +//! [MoonDream Original Implementation](https://github.com/vikhyat/moondream) +//! //! The model consists of: //! - Vision encoder using a ViT-style architecture //! - Text decoder based on Microsoft's Phi model //! - Vision projection module to align vision and text embeddings //! -//! References: -//! - [MoonDream Original Implementation](https://github.com/vikhyat/moondream) +//! # Examples +//! +//! +//! +//! ```bash +//! # download an example image +//! wget https://raw.githubusercontent.com/vikhyat/moondream/main/assets/demo-1.jpg +//! +//! # Now you can run Moondream from the `candle-examples` crate: +//! cargo run --example moondream \ +//! --release -- \ +//! --prompt "What is the girl eating?" +//! --image "./demo-1.jpg" //! +//! > avavx: false, neon: true, simd128: false, f16c: false +//! > temp: 0.00 repeat-penalty: 1.00 repeat-last-n: 64 +//! > retrieved the files in 3.395583ms +//! > Running on CPU, to run on GPU(metal), build this example with `--features metal` +//! > loaded the model in 5.485493792s +//! > loaded and encoded the image Tensor[dims 3, 378, 378; f32] in 4.801396417s +//! > starting the inference loop +//! > The girl is eating a hamburger.< +//! > 9 tokens generated (0.68 token/s) +//! ``` use crate::models::mixformer::{Config as PhiConfig, MixFormerSequentialForCausalLM as PhiModel}; use crate::models::with_tracing::{layer_norm, linear_b, LayerNorm, Linear}; From 9eacceb1ab8842442d89158197ff28b1ecf17c2e Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 10:31:40 -0500 Subject: [PATCH 07/20] update t5 --- candle-transformers/src/models/t5.rs | 46 ++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/candle-transformers/src/models/t5.rs b/candle-transformers/src/models/t5.rs index 9da0c1afec..fa260893e4 100644 --- a/candle-transformers/src/models/t5.rs +++ b/candle-transformers/src/models/t5.rs @@ -14,6 +14,52 @@ //! - [T5 Paper](https://arxiv.org/abs/1910.10683) //! - [HuggingFace T5](https://huggingface.co/docs/transformers/model_doc/t5) //! - [GH Model](https://github.com/huggingface/transformers/blob/main/src/transformers/models/t5/modeling_t5.py) +//! +//! # Encoder-decoder example: +//! +//! ```bash +//! cargo run \ +//! --example t5 \ +//! --release -- \ +//! --model-id "t5-small" \ +//! --prompt "translate to German: A beautiful candle." \ +//! --decode +//! +//! > ... +//! > Eine schöne Kerze. +//! > 9 tokens generated (2.42 token/s) +//! ``` +//! +//! Variants such as [flan-t5](https://huggingface.co/google/flan-t5-small), [flan-ul2](https://huggingface.co/google/flan-ul2) (with `--revision "refs/pr/25"`), and [Co-EdIT](https://huggingface.co/grammarly/coedit-large) are also supported. +//! +//! # Translation with MADLAD +//! +//! +//! [MADLAD-400](https://arxiv.org/abs/2309.04662) is a series of multilingual machine translation T5 models trained on 250 billion tokens covering over 450 languages using publicly available data. These models are competitive with significantly larger models. +//! +//! ```bash +//! cargo run --example t5 --release -- \ +//! --model-id "jbochi/madlad400-3b-mt" \ +//! --prompt "<2de> How are you, my friend?" \ +//! --decode --temperature 0 +//! ... +//! Wie geht es dir, mein Freund? +//! ``` +//! +//! ## Sentence embedding example +//! +//! ```bash +//! cargo run --example t5 --release -- \ +//! --model-id "t5-small" --prompt "A beautiful candle." +//! ... +//! [[[ 0.0515, -0.0541, -0.0761, ..., -0.0392, 0.1511, -0.0265], +//! [-0.0974, 0.0998, -0.1659, ..., -0.2450, 0.1738, -0.0164], +//! [ 0.0624, -0.1024, 0.0430, ..., -0.1388, 0.0564, -0.2962], +//! [-0.0389, -0.1173, 0.0026, ..., 0.1064, -0.1065, 0.0990], +//! [ 0.1300, 0.0027, -0.0326, ..., 0.0026, -0.0317, 0.0851]]] +//! Tensor[[1, 5, 512], f32] +//! Took 303.766583ms +//! ``` use crate::models::with_tracing::Embedding; use candle::{DType, Device, Module, Result, Tensor, D}; From dee62f109d0ed00155caddba5592a11bc3bcfa25 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 10:33:30 -0500 Subject: [PATCH 08/20] update t5 --- candle-transformers/src/models/t5.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/candle-transformers/src/models/t5.rs b/candle-transformers/src/models/t5.rs index fa260893e4..d3fd2ba686 100644 --- a/candle-transformers/src/models/t5.rs +++ b/candle-transformers/src/models/t5.rs @@ -18,13 +18,10 @@ //! # Encoder-decoder example: //! //! ```bash -//! cargo run \ -//! --example t5 \ -//! --release -- \ +//! cargo run --example t5 --release -- \ //! --model-id "t5-small" \ //! --prompt "translate to German: A beautiful candle." \ //! --decode -//! //! > ... //! > Eine schöne Kerze. //! > 9 tokens generated (2.42 token/s) From 1b0ea39ab512c31deee0bdee76fe0a070c84f1d8 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 11:25:16 -0500 Subject: [PATCH 09/20] rwkv --- candle-transformers/src/models/rwkv_v5.rs | 20 ++++++++++++++++++-- candle-transformers/src/models/rwkv_v6.rs | 21 +++++++++++++++++---- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/candle-transformers/src/models/rwkv_v5.rs b/candle-transformers/src/models/rwkv_v5.rs index 6390f886d2..15e386d292 100644 --- a/candle-transformers/src/models/rwkv_v5.rs +++ b/candle-transformers/src/models/rwkv_v5.rs @@ -1,7 +1,9 @@ //! RWKV v5 model implementation. //! -//! RWKV is an RNN with transformer-level performance that can be implemented -//! as either a transformer or RNN. +//! The [RWKV model](https://wiki.rwkv.com/) is a recurrent neural network model +//! with performance on par with transformer architectures. Several variants are +//! available, candle implements the v5 and v6 versions and can be used with +//! Eagle 7B([blog post](https://blog.rwkv.com/p/eagle-7b-soaring-past-transformers)). //! //! Key characteristics: //! - Time-mix attention mechanism @@ -14,6 +16,20 @@ //! - [RWKV Language Model](https://github.com/BlinkDL/RWKV-LM) //! - [RWKV v5 Release](https://github.com/BlinkDL/ChatRWKV/tree/main) //! +//! # Example +//! +//! ```bash +//! cargo run --example rwkv --release -- \ +//! --prompt "The smallest prime is " +//! +//! > avx: true, neon: false, simd128: false, f16c: true +//! > temp: 0.00 repeat-penalty: 1.10 repeat-last-n: 64 +//! > The smallest prime is ϕ(2) = 2. +//! > The smallest composite is ϕ(3) = 3. +//! > The smallest perfect number is ϕ(5) = 5. +//! > The smallest perfect square is ϕ(4) = 4. +//! > The smallest perfect cube is ϕ(6) = 6. +//! ``` use super::with_tracing::{layer_norm, linear_no_bias as linear, LayerNorm, Linear}; use candle::{DType, Device, IndexOp, Result, Tensor}; diff --git a/candle-transformers/src/models/rwkv_v6.rs b/candle-transformers/src/models/rwkv_v6.rs index c75aa885e9..5da1c5ce81 100644 --- a/candle-transformers/src/models/rwkv_v6.rs +++ b/candle-transformers/src/models/rwkv_v6.rs @@ -1,7 +1,9 @@ //! RWKV v6 model implementation. //! -//! RWKV is an RNN with transformer-like performance. -//! Version 6 introduces refinements to the architecture. +//! The [RWKV model](https://wiki.rwkv.com/) is a recurrent neural network model +//! with performance on par with transformer architectures. Several variants are +//! available, candle implements the v5 and v6 versions and can be used with +//! Eagle 7B([blog post](https://blog.rwkv.com/p/eagle-7b-soaring-past-transformers)). //! //! Key characteristics: //! - Linear attention mechanism @@ -10,9 +12,20 @@ //! - Feed forward gating //! - State recycling for efficient inference //! -//! References: -//! - [RWKV Model](https://github.com/BlinkDL/RWKV-LM) +//! # Example //! +//! ```bash +//! cargo run --example rwkv --release -- \ +//! --prompt "The smallest prime is " +//! +//! > avx: true, neon: false, simd128: false, f16c: true +//! > temp: 0.00 repeat-penalty: 1.10 repeat-last-n: 64 +//! > The smallest prime is ϕ(2) = 2. +//! > The smallest composite is ϕ(3) = 3. +//! > The smallest perfect number is ϕ(5) = 5. +//! > The smallest perfect square is ϕ(4) = 4. +//! > The smallest perfect cube is ϕ(6) = 6. +//! ``` use super::with_tracing::{layer_norm, linear_no_bias as linear, LayerNorm, Linear}; use candle::{IndexOp, Result, Tensor}; From c04bfec06e180103fc2344138ff2fbb2830d6a78 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 11:39:32 -0500 Subject: [PATCH 10/20] stable diffusion docs --- .../src/models/stable_diffusion/mod.rs | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/candle-transformers/src/models/stable_diffusion/mod.rs b/candle-transformers/src/models/stable_diffusion/mod.rs index d3e2032b6e..21a5f6bba2 100644 --- a/candle-transformers/src/models/stable_diffusion/mod.rs +++ b/candle-transformers/src/models/stable_diffusion/mod.rs @@ -5,7 +5,38 @@ //! //! - [Original Repository](https://github.com/CompVis/stable-diffusion) //! - [Hugging Face](https://huggingface.co/runwayml/stable-diffusion-v1-5) +//! - The default scheduler for the v1.5, v2.1 and XL 1.0 version is the Denoising Diffusion Implicit Model scheduler (DDIM). The original paper and some code can be found in the [associated repo](https://github.com/ermongroup/ddim). The default scheduler for the XL Turbo version is the Euler Ancestral scheduler. //! +//! +//! # Example +//! +//!
+//! rusty robot holding a candle +//!
+//! +//! > _"A rusty robot holding a fire torch in its hand."_ +//! Generated by Stable Diffusion XL using Rust and [candle](https://github.com/huggingface/candle). +//! +//! ```bash +//! # example running with cuda +//! # see the candle-examples/examples/stable-diffusion for all options +//! cargo run --example stable-diffusion --release --features=cuda,cudnn \ +//! -- --prompt "a cosmonaut on a horse (hd, realistic, high-def)" +//! +//! # with sd-turbo +//! cargo run --example stable-diffusion --release --features=cuda,cudnn \ +//! -- --prompt "a cosmonaut on a horse (hd, realistic, high-def)" \ +//! --sd-version turbo +//! +//! # with flash attention. +//! # feature flag: `--features flash-attn` +//! # cli flag: `--use-flash-attn`. +//! # flash-attention-v2 is only compatible with Ampere, Ada, \ +//! # or Hopper GPUs (e.g., A100/H100, RTX 3090/4090). +//! cargo run --example stable-diffusion --release --features=cuda,cudnn \ +//! -- --prompt "a cosmonaut on a horse (hd, realistic, high-def)" \ +//! --use-flash-attn +//! ``` pub mod attention; pub mod clip; From 88b6353bfde658016b1758b7866634f8b77e19cb Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 11:51:21 -0500 Subject: [PATCH 11/20] add wasm link --- .../src/models/segment_anything/mod.rs | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/candle-transformers/src/models/segment_anything/mod.rs b/candle-transformers/src/models/segment_anything/mod.rs index 3e85fe3594..11798d69f0 100644 --- a/candle-transformers/src/models/segment_anything/mod.rs +++ b/candle-transformers/src/models/segment_anything/mod.rs @@ -1,10 +1,33 @@ //! Segment Anything Model (SAM) //! //! SAM is an architecture for image segmentation, capable of segmenting any object -//! in an image based on prompts like points or boxes. +//! in an image based on prompts like points or boxes. //! This model provides a robust and fast image segmentation pipeline that can be tweaked via +//! some prompting (requesting some points to be in the target mask, requesting some +//! points to be part of the background so _not_ in the target mask, specifying some +//! bounding box). //! //! - [GH Link](https://github.com/facebookresearch/segment-anything) //! - [Paper](https://arxiv.org/abs/2304.02643) +//! - [Interactive Wasm Example](https://huggingface.co/spaces/radames/candle-segment-anything-wasm) +//! - The default backbone can be replaced by the smaller and faster TinyViT model +//! based on [MobileSAM](https://github.com/ChaoningZhang/MobileSAM). +//! +//! ## Example +//! +//! ```bash +//! cargo run --example segment-anything --release -- \ +//! --image candle-examples/examples/yolo-v8/assets/bike.jpg +//! --use-tiny --point 0.6,0.6 --point 0.6,0.55 +//! ``` +//! +//!
+//! +//! +//! +//!
+//! +//! +//! > Original; Prompt with `--point 0.6,0.55`; Prompt with `--point 0.6,0.6 --point 0.6,0.55` //! pub use crate::models::with_tracing::Linear; use candle::{Result, Tensor}; From b002401523135995f3bf17db5673d7322bfbe0cc Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 16 Nov 2024 11:57:23 -0500 Subject: [PATCH 12/20] add segment_anything --- candle-transformers/src/models/segment_anything/mod.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/candle-transformers/src/models/segment_anything/mod.rs b/candle-transformers/src/models/segment_anything/mod.rs index 11798d69f0..9685128de6 100644 --- a/candle-transformers/src/models/segment_anything/mod.rs +++ b/candle-transformers/src/models/segment_anything/mod.rs @@ -6,12 +6,13 @@ //! points to be part of the background so _not_ in the target mask, specifying some //! bounding box). //! -//! - [GH Link](https://github.com/facebookresearch/segment-anything) -//! - [Paper](https://arxiv.org/abs/2304.02643) -//! - [Interactive Wasm Example](https://huggingface.co/spaces/radames/candle-segment-anything-wasm) -//! - The default backbone can be replaced by the smaller and faster TinyViT model +//! - ⚡ [Interactive Wasm Example](https://huggingface.co/spaces/radames/candle-segment-anything-wasm) +//! - 💻 [GH Link](https://github.com/facebookresearch/segment-anything) +//! - 📝 [Paper](https://arxiv.org/abs/2304.02643) +//! - 💡 The default backbone can be replaced by the smaller and faster TinyViT model //! based on [MobileSAM](https://github.com/ChaoningZhang/MobileSAM). //! +//! //! ## Example //! //! ```bash From fd358b9dbe1b3fdba1984bb4bd5599a34419e1d3 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 17 Nov 2024 11:22:30 -0500 Subject: [PATCH 13/20] adjsut for clippy --- candle-transformers/src/models/segment_anything/mod.rs | 3 +-- candle-transformers/src/models/stable_diffusion/mod.rs | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/candle-transformers/src/models/segment_anything/mod.rs b/candle-transformers/src/models/segment_anything/mod.rs index 9685128de6..fe0b099008 100644 --- a/candle-transformers/src/models/segment_anything/mod.rs +++ b/candle-transformers/src/models/segment_anything/mod.rs @@ -9,8 +9,7 @@ //! - ⚡ [Interactive Wasm Example](https://huggingface.co/spaces/radames/candle-segment-anything-wasm) //! - 💻 [GH Link](https://github.com/facebookresearch/segment-anything) //! - 📝 [Paper](https://arxiv.org/abs/2304.02643) -//! - 💡 The default backbone can be replaced by the smaller and faster TinyViT model -//! based on [MobileSAM](https://github.com/ChaoningZhang/MobileSAM). +//! - 💡 The default backbone can be replaced by the smaller and faster TinyViT model based on [MobileSAM](https://github.com/ChaoningZhang/MobileSAM). //! //! //! ## Example diff --git a/candle-transformers/src/models/stable_diffusion/mod.rs b/candle-transformers/src/models/stable_diffusion/mod.rs index 21a5f6bba2..458a7de2d4 100644 --- a/candle-transformers/src/models/stable_diffusion/mod.rs +++ b/candle-transformers/src/models/stable_diffusion/mod.rs @@ -14,8 +14,7 @@ //! rusty robot holding a candle //! //! -//! > _"A rusty robot holding a fire torch in its hand."_ -//! Generated by Stable Diffusion XL using Rust and [candle](https://github.com/huggingface/candle). +//! _"A rusty robot holding a fire torch in its hand."_ Generated by Stable Diffusion XL using Rust and [candle](https://github.com/huggingface/candle). //! //! ```bash //! # example running with cuda From ba6abe0069d7bef88a5cf09c67b0a71eb0c26129 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 17 Nov 2024 11:54:43 -0500 Subject: [PATCH 14/20] ignore bertdoc --- candle-transformers/src/models/bert.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/candle-transformers/src/models/bert.rs b/candle-transformers/src/models/bert.rs index 808ca41557..bd11b01a90 100644 --- a/candle-transformers/src/models/bert.rs +++ b/candle-transformers/src/models/bert.rs @@ -7,7 +7,7 @@ //! - Upstream [Github repo](https://github.com/google-research/bert). //! - See bert in [candle-examples](https://github.com/huggingface/candle/tree/main/candle-examples/) for runnable code //! -//! ```no_run +//! ```ignore //! // for sentence embeddings //! # use candle_core::Tensor; //! # use candle_nn::{VarBuilder, Module}; From 8a34fffa059586b9bebe56696fe534ee970394f8 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 17 Nov 2024 11:56:18 -0500 Subject: [PATCH 15/20] dinov2 ignore --- candle-transformers/src/models/dinov2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs index c737042547..2317eee239 100644 --- a/candle-transformers/src/models/dinov2.rs +++ b/candle-transformers/src/models/dinov2.rs @@ -6,7 +6,7 @@ //! //! # Example usage //! -//! ```rust,no_run +//! ```ignore //! # use candle::Result; //! # fn main() -> Result<()> { //! use candle_transformers::dinov2::vit_small; From 9ddb2b0dd554fdd0015f14c5cc5c57c2a76cf3f3 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 17 Nov 2024 12:03:11 -0500 Subject: [PATCH 16/20] update block to be text --- candle-transformers/src/models/bert.rs | 22 +++++++++++----------- candle-transformers/src/models/dinov2.rs | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/candle-transformers/src/models/bert.rs b/candle-transformers/src/models/bert.rs index bd11b01a90..a7283ea52e 100644 --- a/candle-transformers/src/models/bert.rs +++ b/candle-transformers/src/models/bert.rs @@ -7,26 +7,26 @@ //! - Upstream [Github repo](https://github.com/google-research/bert). //! - See bert in [candle-examples](https://github.com/huggingface/candle/tree/main/candle-examples/) for runnable code //! -//! ```ignore -//! // for sentence embeddings +//! ```rust,text +//! // Different models can be loaded using the model ID //! # use candle_core::Tensor; //! # use candle_nn::{VarBuilder, Module}; //! # fn main() -> candle_core::Result<()> { -//! # let model = todo!(); -//! # let prompt = "Here is a test sentence"; -//! let embeddings = model.forward(prompt)?; -//! // Returns tensor of shape [1, 7, 384] -//! println!("{embeddings}"); +//! # let vb = todo!(); +//! # let config = todo!(); +//! let model = BertModel::load(vb, &config )?; //! # Ok(()) //! # } //! -//! // Different models can be loaded using the model ID +//! // for sentence embeddings //! # use candle_core::Tensor; //! # use candle_nn::{VarBuilder, Module}; //! # fn main() -> candle_core::Result<()> { -//! # let vb = todo!(); -//! # let config = todo!(); -//! let model = BertModel::load(vb, &config )?; +//! # let model = todo!(); +//! let prompt = "Here is a test sentence"; +//! let embeddings = model.forward(prompt)?; +//! // Returns tensor of shape [1, 7, 384] +//! println!("{embeddings}"); //! # Ok(()) //! # } //! diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs index 2317eee239..d960adb51b 100644 --- a/candle-transformers/src/models/dinov2.rs +++ b/candle-transformers/src/models/dinov2.rs @@ -6,7 +6,7 @@ //! //! # Example usage //! -//! ```ignore +//! ```rust,text //! # use candle::Result; //! # fn main() -> Result<()> { //! use candle_transformers::dinov2::vit_small; From 7649c27e599c8bd26cd5009d410086a15d798a40 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 17 Nov 2024 12:34:52 -0500 Subject: [PATCH 17/20] remove the rust blocks for the moment --- candle-transformers/src/models/bert.rs | 50 ------------------------ candle-transformers/src/models/dinov2.rs | 11 ------ 2 files changed, 61 deletions(-) diff --git a/candle-transformers/src/models/bert.rs b/candle-transformers/src/models/bert.rs index a7283ea52e..da8734160a 100644 --- a/candle-transformers/src/models/bert.rs +++ b/candle-transformers/src/models/bert.rs @@ -7,56 +7,6 @@ //! - Upstream [Github repo](https://github.com/google-research/bert). //! - See bert in [candle-examples](https://github.com/huggingface/candle/tree/main/candle-examples/) for runnable code //! -//! ```rust,text -//! // Different models can be loaded using the model ID -//! # use candle_core::Tensor; -//! # use candle_nn::{VarBuilder, Module}; -//! # fn main() -> candle_core::Result<()> { -//! # let vb = todo!(); -//! # let config = todo!(); -//! let model = BertModel::load(vb, &config )?; -//! # Ok(()) -//! # } -//! -//! // for sentence embeddings -//! # use candle_core::Tensor; -//! # use candle_nn::{VarBuilder, Module}; -//! # fn main() -> candle_core::Result<()> { -//! # let model = todo!(); -//! let prompt = "Here is a test sentence"; -//! let embeddings = model.forward(prompt)?; -//! // Returns tensor of shape [1, 7, 384] -//! println!("{embeddings}"); -//! # Ok(()) -//! # } -//! -//! // Gelu approximation -//! // You can get a speedup by configuring the model -//! // to use an approximation of the gelu activation: -//! # use candle_core::Tensor; -//! # use candle_nn::{VarBuilder, Module}; -//! # fn main() -> candle_core::Result<()> { -//! # let mut config = todo!(); -//! config.hidden_act = HiddenAct::GeluApproximate; -//! # Ok(()) -//! # } -//! -//! // Similarities -//! // Bert can compute sentence embeddings which can then be used to calculate -//! // semantic similarities between sentences through cosine similarity scoring. -//! // The sentence embeddings are computed using average pooling across all tokens. -//! # use candle_core::Tensor; -//! # use candle_nn::{VarBuilder, Module}; -//! # fn main() -> candle_core::Result<()> { -//! # let model = todo!(); -//! let sentence1 = "The new movie is awesome"; -//! let sentence2 = "The new movie is so great"; -//! let emb1 = model.forward(sentence1)?; -//! let emb2 = model.forward(sentence2)?; -//! # Ok(()) -//! # } -//! ``` -//! use super::with_tracing::{layer_norm, linear, LayerNorm, Linear}; use candle::{DType, Device, Result, Tensor}; use candle_nn::{embedding, Embedding, Module, VarBuilder}; diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs index d960adb51b..4d46941f8b 100644 --- a/candle-transformers/src/models/dinov2.rs +++ b/candle-transformers/src/models/dinov2.rs @@ -4,17 +4,6 @@ //! DINOv2 is a self-supervised learning model that can learn visual features //! without using any labeled data. See: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2) //! -//! # Example usage -//! -//! ```rust,text -//! # use candle::Result; -//! # fn main() -> Result<()> { -//! use candle_transformers::dinov2::vit_small; -//! let model = vit_small(vb)?; -//! # Ok(()) -//! # } -//! ``` -//! //! ## Running an example with color map and CUDA //! //! ```bash From 7f75c1866207a33eb6d11b2c543fe1d41b05a1e1 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 17 Nov 2024 13:32:36 -0500 Subject: [PATCH 18/20] bump python to 3.11 --- .github/workflows/rust-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index ee480c474c..76137e9487 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -14,6 +14,7 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macOS-latest] rust: [stable] + python-version: ["3.11"] steps: - uses: actions/checkout@v4 - uses: actions-rs/toolchain@v1 From 162fbaa5eaef6a87f8b4f2f3a57c78aa21758fff Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 17 Nov 2024 13:58:04 -0500 Subject: [PATCH 19/20] add a setup-python step --- .github/workflows/rust-ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 76137e9487..6c86b3dd98 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -14,9 +14,11 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macOS-latest] rust: [stable] - python-version: ["3.11"] steps: - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" - uses: actions-rs/toolchain@v1 with: profile: minimal From 5555d6ad7d9b694c2c960988e86f0c7ad74ee686 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 17 Nov 2024 14:11:20 -0500 Subject: [PATCH 20/20] add py311 to test as well --- .github/workflows/rust-ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 6c86b3dd98..db25503079 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -38,6 +38,9 @@ jobs: rust: [stable] steps: - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" - uses: actions-rs/toolchain@v1 with: profile: minimal