From 1630bd275359bd3d0e523af462b57840ac9f3ab8 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 09:54:27 -0500
Subject: [PATCH 01/20] dinov2

---
 candle-transformers/src/models/dinov2.rs | 25 ++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs
index df8834d1f7..a0bb89abaa 100644
--- a/candle-transformers/src/models/dinov2.rs
+++ b/candle-transformers/src/models/dinov2.rs
@@ -1,8 +1,29 @@
 //! Implementation of the DINOv2 models from Meta Research.
 //!
-//! See:
-//! - DINOv2: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2)
+//! This module implements the DINOv2 vision transformer model from Meta AI Research.
+//! DINOv2 is a self-supervised learning model that can learn visual features
+//! without using any labeled data. See: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2)
 //!
+//! # Example usage:
+//!
+//! ```rust,no_run
+//! # use candle::Result;
+//! # fn main() -> Result<()> {
+//! use candle_transformers::dinov2::vit_small;
+//! let model = vit_small(vb)?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! ## Running an example with color map and CUDA
+//!
+//! ```bash
+//! cargo run --features cuda,depth_anything_v2 \
+//!   --package candle-examples \
+//!   --example depth_anything_v2
+//!   -- --color-map --image candle-examples/examples/yolo-v8/assets/bike.jpg
+//! ```
+
 use candle::{IndexOp, Result, Tensor, D};
 use candle_nn::{layer_norm, LayerNorm, Linear, Module, VarBuilder};
 

From d594eca8c541c356b8a4069f94402bde83a44e6b Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 10:00:26 -0500
Subject: [PATCH 02/20] add another example

---
 candle-transformers/src/models/dinov2.rs | 32 +++++++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs
index a0bb89abaa..d9f1e9a1d9 100644
--- a/candle-transformers/src/models/dinov2.rs
+++ b/candle-transformers/src/models/dinov2.rs
@@ -4,7 +4,7 @@
 //! DINOv2 is a self-supervised learning model that can learn visual features
 //! without using any labeled data. See: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2)
 //!
-//! # Example usage:
+//! # Example usage
 //!
 //! ```rust,no_run
 //! # use candle::Result;
@@ -18,11 +18,35 @@
 //! ## Running an example with color map and CUDA
 //!
 //! ```bash
-//! cargo run --features cuda,depth_anything_v2 \
+//! cargo run \
+//!   --features cuda,depth_anything_v2 \
 //!   --package candle-examples \
-//!   --example depth_anything_v2
-//!   -- --color-map --image candle-examples/examples/yolo-v8/assets/bike.jpg
+//!   --example depth_anything_v2 \
+//!   -- --color-map \
+//!   --image candle-examples/examples/yolo-v8/assets/bike.jpg
 //! ```
+//!
+//! ## Running as an ImageNet classifier
+//!
+//! The model returns the probability for the image to belong to each of the 1000 ImageNet categories.
+//!
+//! <div align=center>
+//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/yolo-v8/assets/bike.jpg" alt="" width=320>
+//! </div>
+//!
+//! ```bash
+//! cargo run \
+//!   --example dinov2 \
+//!   --release \
+//!   -- --image candle-examples/examples/yolo-v8/assets/bike.jpg
+//!
+//! > mountain bike, all-terrain bike, off-roader: 43.67%
+//! > bicycle-built-for-two, tandem bicycle, tandem: 33.20%
+//! > crash helmet            : 13.23%
+//! > unicycle, monocycle     : 2.44%
+//! > maillot                 : 2.42%
+//! ```
+//!
 
 use candle::{IndexOp, Result, Tensor, D};
 use candle_nn::{layer_norm, LayerNorm, Linear, Module, VarBuilder};

From 7b34dfc60b04289e4e1a9cd758ff2dd80f5f0bf0 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 10:10:48 -0500
Subject: [PATCH 03/20] ad dinov2reg4

---
 candle-transformers/src/models/dinov2reg4.rs | 31 ++++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/candle-transformers/src/models/dinov2reg4.rs b/candle-transformers/src/models/dinov2reg4.rs
index 0d2320e14c..549f2c3ce5 100644
--- a/candle-transformers/src/models/dinov2reg4.rs
+++ b/candle-transformers/src/models/dinov2reg4.rs
@@ -1,9 +1,34 @@
 //! Implementation of the DINOv2 revision (4 regularization)
 //!
-//! See:
-//! - DINOv2: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2)
+//! The DINOv2-reg4 model is a variant of DINOv2 that adds 4 regularization tokens to the
+//! original architecture. This implementation is specifically trained for plant species
+//! classification on the PlantCLEF2024 dataset with 7,806 classes.
 //!
-//! This code implements the regularization tokens version with 4 regularization tokens.
+//! - [Paper](https://arxiv.org/abs/2309.16588). DINOv2: Learning Robust Visual Features without Supervision
+//! - [GH Repo](https://github.com/facebookresearch/dinov2)
+//!
+//! # Example
+//!
+//! ```bash
+//! # Download classes names and a plant picture to identify
+//! # see candle/examples/dinov2reg4 for full code.
+//!
+//! # Perform inference
+//! cargo run \
+//!   --example dinov2reg4 \
+//!   --release -- \
+//!   --image <orchid-file>
+//!
+//! > Orchis simia Lam.       : 45.55%
+//! > Orchis × bergonii Nanteuil: 9.80%
+//! > Orchis italica Poir.    : 9.66%
+//! > Orchis × angusticruris Franch.: 2.76%
+//! > Orchis × bivonae Tod.   : 2.54%
+//! ```
+//!
+//! <div align=center>
+//!   <img src="https://bs.plantnet.org/image/o/bd2d3830ac3270218ba82fd24e2290becd01317c" alt="" width=320>
+//! </div>
 //!
 use candle::{IndexOp, Result, Tensor, D};
 use candle_nn::{layer_norm, LayerNorm, Linear, Module, VarBuilder};

From 4e0cee2b8e2bcaa59db3de95b04625861804a279 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 10:17:32 -0500
Subject: [PATCH 04/20] eva2

---
 candle-transformers/src/models/dinov2.rs |  2 +-
 candle-transformers/src/models/eva2.rs   | 28 +++++++++++++++++++++---
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs
index d9f1e9a1d9..c737042547 100644
--- a/candle-transformers/src/models/dinov2.rs
+++ b/candle-transformers/src/models/dinov2.rs
@@ -31,7 +31,7 @@
 //! The model returns the probability for the image to belong to each of the 1000 ImageNet categories.
 //!
 //! <div align=center>
-//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/yolo-v8/assets/bike.jpg" alt="" width=320>
+//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/yolo-v8/assets/bike.jpg" alt="" width=640>
 //! </div>
 //!
 //! ```bash
diff --git a/candle-transformers/src/models/eva2.rs b/candle-transformers/src/models/eva2.rs
index ee84cca43c..9e31f58c73 100644
--- a/candle-transformers/src/models/eva2.rs
+++ b/candle-transformers/src/models/eva2.rs
@@ -1,9 +1,31 @@
 //! EVA-2 inference implementation.
 //!
-//! See ["EVA-02: A Visual Representation for Neon Genesis"](https://arxiv.org/abs/2303.11331)
+//! EVA-02 is a computer vision model that can be used as an ImageNet classifier.
+//! The model returns the probability for an image to belong to each of the 1000
+//! ImageNet categories.
+//!
+//! - [Paper](https://arxiv.org/abs/2303.11331). EVA-02: A Visual Representation for Neon Genesis
+//! - [Code](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/eva2.py)
+//!
+//! # Example
+//!
+//! ```bash
+//! cargo run \
+//!   --example eva2 \
+//!   --release -- \
+//!   --image candle-examples/examples/yolo-v8/assets/bike.jpg
+//!
+//! > mountain bike, all-terrain bike, off-roader: 37.09%
+//! > maillot                 : 8.30%
+//! > alp                     : 2.13%
+//! > bicycle-built-for-two, tandem bicycle, tandem: 0.84%
+//! > crash helmet            : 0.73%
+//! ```
+//!
+//! <div align=center>
+//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/yolo-v8/assets/bike.jpg" alt="" width=640>
+//! </div>
 //!
-//! Based on implementation from [pytorch-image-models](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/eva2.py)
-
 use candle::{IndexOp, Result, Tensor, D};
 use candle_nn::{layer_norm, LayerNorm, Linear, Module, VarBuilder};
 

From 0328ede151b848349ca4df516a5e2c1e0c1329d0 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 10:21:39 -0500
Subject: [PATCH 05/20] efficientvit

---
 .../src/models/efficientvit.rs                | 37 +++++++++++++++++--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/candle-transformers/src/models/efficientvit.rs b/candle-transformers/src/models/efficientvit.rs
index 9724f702a6..4c231d7679 100644
--- a/candle-transformers/src/models/efficientvit.rs
+++ b/candle-transformers/src/models/efficientvit.rs
@@ -1,9 +1,40 @@
 //! EfficientViT (MSRA) inference implementation based on timm.
 //!
-//! See ["EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention"](https://arxiv.org/abs/2305.07027)
+//! This crate provides an implementation of the EfficientViT model from Microsoft Research Asia
+//! for efficient image classification. The model uses cascaded group attention modules
+//! to achieve strong performance while maintaining low memory usage.
+//!
+//! The model was originally described in the paper:
+//! ["EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention"](https://arxiv.org/abs/2305.07027)
+//!
+//! This implementation is based on the reference implementation from
+//! [pytorch-image-models](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/efficientvit_msra.py).
+//!
+//! # Example Usage
+//!
+//! This candle implementation uses a pre-trained EfficientViT (from Microsoft Research Asia) network for inference.
+//! The classification head has been trained on the ImageNet dataset and returns the probabilities for the top-5 classes.
+//!
+//!
+//! ```bash
+//! cargo run
+//!   --example efficientvit \
+//!   --release -- \
+//!   --image candle-examples/examples/yolo-v8/assets/bike.jpg --which m1
+//!
+//! > loaded image Tensor[dims 3, 224, 224; f32]
+//! > model built
+//! > mountain bike, all-terrain bike, off-roader: 69.80%
+//! > unicycle, monocycle     : 13.03%
+//! > bicycle-built-for-two, tandem bicycle, tandem: 9.28%
+//! > crash helmet            : 2.25%
+//! > alp                     : 0.46%
+//! ```
+//!
+//! <div align=center>
+//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/yolo-v8/assets/bike.jpg" alt="" width=640>
+//! </div>
 //!
-//! Based on implementation from [pytorch-image-models](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/efficientvit_msra.py)
-
 use candle::{Result, Tensor, D};
 use candle_nn::{
     batch_norm, conv2d, conv2d_no_bias, linear, ops::sigmoid, ops::softmax, Conv2dConfig, Func,

From 42fd847609f329ce7ae7348581685a6cce077f95 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 10:27:05 -0500
Subject: [PATCH 06/20] moondream

---
 candle-transformers/src/models/moondream.rs | 30 +++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/candle-transformers/src/models/moondream.rs b/candle-transformers/src/models/moondream.rs
index d351d7c019..a9dc9b7dc2 100644
--- a/candle-transformers/src/models/moondream.rs
+++ b/candle-transformers/src/models/moondream.rs
@@ -1,13 +1,39 @@
 //! MoonDream Model vision-to-text
 //!
+//!
+//! Moondream is a computer-vision model that can answer real-world questions about images.
+//! It's lightweight with only 1.6B parameters, enabling it to run on mobile phones and edge devices.
+//! [MoonDream Original Implementation](https://github.com/vikhyat/moondream)
+//!
 //! The model consists of:
 //! - Vision encoder using a ViT-style architecture
 //! - Text decoder based on Microsoft's Phi model
 //! - Vision projection module to align vision and text embeddings
 //!
-//! References:
-//! - [MoonDream Original Implementation](https://github.com/vikhyat/moondream)
+//! # Examples
+//!
+//! <img src="https://raw.githubusercontent.com/vikhyat/moondream/main/assets/demo-1.jpg" width="200">
+//!
+//! ```bash
+//! # download an example image
+//! wget https://raw.githubusercontent.com/vikhyat/moondream/main/assets/demo-1.jpg
+//!
+//! # Now you can run Moondream from the `candle-examples` crate:
+//! cargo run --example moondream \
+//!   --release -- \
+//!   --prompt "What is the girl eating?"
+//!   --image "./demo-1.jpg"
 //!
+//! > avavx: false, neon: true, simd128: false, f16c: false
+//! > temp: 0.00 repeat-penalty: 1.00 repeat-last-n: 64
+//! > retrieved the files in 3.395583ms
+//! > Running on CPU, to run on GPU(metal), build this example with `--features metal`
+//! > loaded the model in 5.485493792s
+//! > loaded and encoded the image Tensor[dims 3, 378, 378; f32] in 4.801396417s
+//! > starting the inference loop
+//! > The girl is eating a hamburger.<
+//! > 9 tokens generated (0.68 token/s)
+//! ```
 
 use crate::models::mixformer::{Config as PhiConfig, MixFormerSequentialForCausalLM as PhiModel};
 use crate::models::with_tracing::{layer_norm, linear_b, LayerNorm, Linear};

From 9eacceb1ab8842442d89158197ff28b1ecf17c2e Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 10:31:40 -0500
Subject: [PATCH 07/20] update t5

---
 candle-transformers/src/models/t5.rs | 46 ++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/candle-transformers/src/models/t5.rs b/candle-transformers/src/models/t5.rs
index 9da0c1afec..fa260893e4 100644
--- a/candle-transformers/src/models/t5.rs
+++ b/candle-transformers/src/models/t5.rs
@@ -14,6 +14,52 @@
 //! - [T5 Paper](https://arxiv.org/abs/1910.10683)
 //! - [HuggingFace T5](https://huggingface.co/docs/transformers/model_doc/t5)
 //! - [GH Model](https://github.com/huggingface/transformers/blob/main/src/transformers/models/t5/modeling_t5.py)
+//!
+//! # Encoder-decoder example:
+//!
+//! ```bash
+//! cargo run \
+//!   --example t5 \
+//!   --release -- \
+//!   --model-id "t5-small" \
+//!   --prompt "translate to German: A beautiful candle." \
+//!   --decode
+//!
+//! > ...
+//! >  Eine schöne Kerze.
+//! > 9 tokens generated (2.42 token/s)
+//! ```
+//!
+//! Variants such as [flan-t5](https://huggingface.co/google/flan-t5-small), [flan-ul2](https://huggingface.co/google/flan-ul2) (with `--revision "refs/pr/25"`), and [Co-EdIT](https://huggingface.co/grammarly/coedit-large) are also supported.
+//!
+//! # Translation with MADLAD
+//!
+//!
+//! [MADLAD-400](https://arxiv.org/abs/2309.04662) is a series of multilingual machine translation T5 models trained on 250 billion tokens covering over 450 languages using publicly available data. These models are competitive with significantly larger models.
+//!
+//! ```bash
+//! cargo run --example t5 --release  -- \
+//!   --model-id "jbochi/madlad400-3b-mt" \
+//!   --prompt "<2de> How are you, my friend?" \
+//!   --decode --temperature 0
+//! ...
+//!  Wie geht es dir, mein Freund?
+//! ```
+//!
+//! ## Sentence embedding example
+//!
+//! ```bash
+//! cargo run --example t5 --release -- \
+//!   --model-id "t5-small" --prompt "A beautiful candle."
+//! ...
+//! [[[ 0.0515, -0.0541, -0.0761, ..., -0.0392,  0.1511, -0.0265],
+//!   [-0.0974,  0.0998, -0.1659, ..., -0.2450,  0.1738, -0.0164],
+//!   [ 0.0624, -0.1024,  0.0430, ..., -0.1388,  0.0564, -0.2962],
+//!   [-0.0389, -0.1173,  0.0026, ...,  0.1064, -0.1065,  0.0990],
+//!   [ 0.1300,  0.0027, -0.0326, ...,  0.0026, -0.0317,  0.0851]]]
+//! Tensor[[1, 5, 512], f32]
+//! Took 303.766583ms
+//! ```
 
 use crate::models::with_tracing::Embedding;
 use candle::{DType, Device, Module, Result, Tensor, D};

From dee62f109d0ed00155caddba5592a11bc3bcfa25 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 10:33:30 -0500
Subject: [PATCH 08/20] update t5

---
 candle-transformers/src/models/t5.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/candle-transformers/src/models/t5.rs b/candle-transformers/src/models/t5.rs
index fa260893e4..d3fd2ba686 100644
--- a/candle-transformers/src/models/t5.rs
+++ b/candle-transformers/src/models/t5.rs
@@ -18,13 +18,10 @@
 //! # Encoder-decoder example:
 //!
 //! ```bash
-//! cargo run \
-//!   --example t5 \
-//!   --release -- \
+//! cargo run --example t5 --release -- \
 //!   --model-id "t5-small" \
 //!   --prompt "translate to German: A beautiful candle." \
 //!   --decode
-//!
 //! > ...
 //! >  Eine schöne Kerze.
 //! > 9 tokens generated (2.42 token/s)

From 1b0ea39ab512c31deee0bdee76fe0a070c84f1d8 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 11:25:16 -0500
Subject: [PATCH 09/20] rwkv

---
 candle-transformers/src/models/rwkv_v5.rs | 20 ++++++++++++++++++--
 candle-transformers/src/models/rwkv_v6.rs | 21 +++++++++++++++++----
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/candle-transformers/src/models/rwkv_v5.rs b/candle-transformers/src/models/rwkv_v5.rs
index 6390f886d2..15e386d292 100644
--- a/candle-transformers/src/models/rwkv_v5.rs
+++ b/candle-transformers/src/models/rwkv_v5.rs
@@ -1,7 +1,9 @@
 //! RWKV v5 model implementation.
 //!
-//! RWKV is an RNN with transformer-level performance that can be implemented
-//! as either a transformer or RNN.
+//! The [RWKV model](https://wiki.rwkv.com/) is a recurrent neural network model
+//! with performance on par with transformer architectures. Several variants are
+//! available, candle implements the v5 and v6 versions and can be used with
+//! Eagle 7B([blog post](https://blog.rwkv.com/p/eagle-7b-soaring-past-transformers)).
 //!
 //! Key characteristics:
 //! - Time-mix attention mechanism
@@ -14,6 +16,20 @@
 //! - [RWKV Language Model](https://github.com/BlinkDL/RWKV-LM)
 //! - [RWKV v5 Release](https://github.com/BlinkDL/ChatRWKV/tree/main)
 //!
+//! # Example
+//!
+//! ```bash
+//! cargo run --example rwkv --release -- \
+//!   --prompt "The smallest prime is "
+//!
+//! > avx: true, neon: false, simd128: false, f16c: true
+//! > temp: 0.00 repeat-penalty: 1.10 repeat-last-n: 64
+//! > The smallest prime is ϕ(2) = 2.
+//! > The smallest composite is ϕ(3) = 3.
+//! > The smallest perfect number is ϕ(5) = 5.
+//! > The smallest perfect square is ϕ(4) = 4.
+//! > The smallest perfect cube is ϕ(6) = 6.
+//! ```
 
 use super::with_tracing::{layer_norm, linear_no_bias as linear, LayerNorm, Linear};
 use candle::{DType, Device, IndexOp, Result, Tensor};
diff --git a/candle-transformers/src/models/rwkv_v6.rs b/candle-transformers/src/models/rwkv_v6.rs
index c75aa885e9..5da1c5ce81 100644
--- a/candle-transformers/src/models/rwkv_v6.rs
+++ b/candle-transformers/src/models/rwkv_v6.rs
@@ -1,7 +1,9 @@
 //! RWKV v6 model implementation.
 //!
-//! RWKV is an RNN with transformer-like performance.
-//! Version 6 introduces refinements to the architecture.
+//! The [RWKV model](https://wiki.rwkv.com/) is a recurrent neural network model
+//! with performance on par with transformer architectures. Several variants are
+//! available, candle implements the v5 and v6 versions and can be used with
+//! Eagle 7B([blog post](https://blog.rwkv.com/p/eagle-7b-soaring-past-transformers)).
 //!
 //! Key characteristics:
 //! - Linear attention mechanism
@@ -10,9 +12,20 @@
 //! - Feed forward gating
 //! - State recycling for efficient inference
 //!
-//! References:
-//! - [RWKV Model](https://github.com/BlinkDL/RWKV-LM)
+//! # Example
 //!
+//! ```bash
+//! cargo run --example rwkv --release -- \
+//!   --prompt "The smallest prime is "
+//!
+//! > avx: true, neon: false, simd128: false, f16c: true
+//! > temp: 0.00 repeat-penalty: 1.10 repeat-last-n: 64
+//! > The smallest prime is ϕ(2) = 2.
+//! > The smallest composite is ϕ(3) = 3.
+//! > The smallest perfect number is ϕ(5) = 5.
+//! > The smallest perfect square is ϕ(4) = 4.
+//! > The smallest perfect cube is ϕ(6) = 6.
+//! ```
 
 use super::with_tracing::{layer_norm, linear_no_bias as linear, LayerNorm, Linear};
 use candle::{IndexOp, Result, Tensor};

From c04bfec06e180103fc2344138ff2fbb2830d6a78 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 11:39:32 -0500
Subject: [PATCH 10/20] stable diffusion docs

---
 .../src/models/stable_diffusion/mod.rs        | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/candle-transformers/src/models/stable_diffusion/mod.rs b/candle-transformers/src/models/stable_diffusion/mod.rs
index d3e2032b6e..21a5f6bba2 100644
--- a/candle-transformers/src/models/stable_diffusion/mod.rs
+++ b/candle-transformers/src/models/stable_diffusion/mod.rs
@@ -5,7 +5,38 @@
 //!
 //! - [Original Repository](https://github.com/CompVis/stable-diffusion)
 //! - [Hugging Face](https://huggingface.co/runwayml/stable-diffusion-v1-5)
+//! -  The default scheduler for the v1.5, v2.1 and XL 1.0 version is the Denoising Diffusion Implicit Model scheduler (DDIM). The original paper and some code can be found in the [associated repo](https://github.com/ermongroup/ddim). The default scheduler for the XL Turbo version is the Euler Ancestral scheduler.
 //!
+//!
+//! # Example
+//!
+//! <div align=center>
+//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/stable-diffusion/assets/stable-diffusion-xl.jpg" alt="rusty robot holding a candle" width=320>
+//! </div>
+//!
+//! > _"A rusty robot holding a fire torch in its hand."_
+//! Generated by Stable Diffusion XL using Rust and [candle](https://github.com/huggingface/candle).
+//!
+//! ```bash
+//! # example running with cuda
+//! # see the candle-examples/examples/stable-diffusion for all options
+//! cargo run --example stable-diffusion --release --features=cuda,cudnn \
+//!     -- --prompt "a cosmonaut on a horse (hd, realistic, high-def)"
+//!
+//! # with sd-turbo
+//! cargo run --example stable-diffusion --release --features=cuda,cudnn \
+//!     -- --prompt "a cosmonaut on a horse (hd, realistic, high-def)" \
+//!     --sd-version turbo
+//!
+//! # with flash attention.
+//! # feature flag: `--features flash-attn`
+//! # cli flag: `--use-flash-attn`.
+//! # flash-attention-v2 is only compatible with Ampere, Ada, \
+//! # or Hopper GPUs (e.g., A100/H100, RTX 3090/4090).
+//! cargo run --example stable-diffusion --release --features=cuda,cudnn \
+//!     -- --prompt "a cosmonaut on a horse (hd, realistic, high-def)" \
+//!     --use-flash-attn
+//! ```
 
 pub mod attention;
 pub mod clip;

From 88b6353bfde658016b1758b7866634f8b77e19cb Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 11:51:21 -0500
Subject: [PATCH 11/20] add wasm link

---
 .../src/models/segment_anything/mod.rs        | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/candle-transformers/src/models/segment_anything/mod.rs b/candle-transformers/src/models/segment_anything/mod.rs
index 3e85fe3594..11798d69f0 100644
--- a/candle-transformers/src/models/segment_anything/mod.rs
+++ b/candle-transformers/src/models/segment_anything/mod.rs
@@ -1,10 +1,33 @@
 //! Segment Anything Model (SAM)
 //!
 //! SAM is an architecture for image segmentation, capable of segmenting any object
-//! in an image based on prompts like points or boxes.
+//! in an image based on prompts like points or boxes. //! This model provides a robust and fast image segmentation pipeline that can be tweaked via
+//! some prompting (requesting some points to be in the target mask, requesting some
+//! points to be part of the background so _not_ in the target mask, specifying some
+//! bounding box).
 //!
 //! - [GH Link](https://github.com/facebookresearch/segment-anything)
 //! - [Paper](https://arxiv.org/abs/2304.02643)
+//! - [Interactive Wasm Example](https://huggingface.co/spaces/radames/candle-segment-anything-wasm)
+//! - The default backbone can be replaced by the smaller and faster TinyViT model
+//! based on [MobileSAM](https://github.com/ChaoningZhang/MobileSAM).
+//!
+//! ## Example
+//!
+//! ```bash
+//! cargo run --example segment-anything --release -- \
+//!     --image candle-examples/examples/yolo-v8/assets/bike.jpg
+//!     --use-tiny --point 0.6,0.6 --point 0.6,0.55
+//! ```
+//!
+//! <div align=center style="display: flex; justify-content: center; gap: 10px;">
+//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/yolo-v8/assets/bike.jpg" alt="" width="30%">
+//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/segment-anything/assets/single_pt_prompt.jpg" alt="" width="30%">
+//!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/segment-anything/assets/two_pt_prompt.jpg" alt="" width="30%">
+//! </div>
+//!
+//!
+//! > Original; Prompt with `--point 0.6,0.55`; Prompt with `--point 0.6,0.6 --point 0.6,0.55`
 //!
 pub use crate::models::with_tracing::Linear;
 use candle::{Result, Tensor};

From b002401523135995f3bf17db5673d7322bfbe0cc Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sat, 16 Nov 2024 11:57:23 -0500
Subject: [PATCH 12/20] add segment_anything

---
 candle-transformers/src/models/segment_anything/mod.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/candle-transformers/src/models/segment_anything/mod.rs b/candle-transformers/src/models/segment_anything/mod.rs
index 11798d69f0..9685128de6 100644
--- a/candle-transformers/src/models/segment_anything/mod.rs
+++ b/candle-transformers/src/models/segment_anything/mod.rs
@@ -6,12 +6,13 @@
 //! points to be part of the background so _not_ in the target mask, specifying some
 //! bounding box).
 //!
-//! - [GH Link](https://github.com/facebookresearch/segment-anything)
-//! - [Paper](https://arxiv.org/abs/2304.02643)
-//! - [Interactive Wasm Example](https://huggingface.co/spaces/radames/candle-segment-anything-wasm)
-//! - The default backbone can be replaced by the smaller and faster TinyViT model
+//! - ⚡ [Interactive Wasm Example](https://huggingface.co/spaces/radames/candle-segment-anything-wasm)
+//! - 💻 [GH Link](https://github.com/facebookresearch/segment-anything)
+//! - 📝 [Paper](https://arxiv.org/abs/2304.02643)
+//! - 💡 The default backbone can be replaced by the smaller and faster TinyViT model
 //! based on [MobileSAM](https://github.com/ChaoningZhang/MobileSAM).
 //!
+//!
 //! ## Example
 //!
 //! ```bash

From fd358b9dbe1b3fdba1984bb4bd5599a34419e1d3 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sun, 17 Nov 2024 11:22:30 -0500
Subject: [PATCH 13/20] adjsut for clippy

---
 candle-transformers/src/models/segment_anything/mod.rs | 3 +--
 candle-transformers/src/models/stable_diffusion/mod.rs | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/candle-transformers/src/models/segment_anything/mod.rs b/candle-transformers/src/models/segment_anything/mod.rs
index 9685128de6..fe0b099008 100644
--- a/candle-transformers/src/models/segment_anything/mod.rs
+++ b/candle-transformers/src/models/segment_anything/mod.rs
@@ -9,8 +9,7 @@
 //! - ⚡ [Interactive Wasm Example](https://huggingface.co/spaces/radames/candle-segment-anything-wasm)
 //! - 💻 [GH Link](https://github.com/facebookresearch/segment-anything)
 //! - 📝 [Paper](https://arxiv.org/abs/2304.02643)
-//! - 💡 The default backbone can be replaced by the smaller and faster TinyViT model
-//! based on [MobileSAM](https://github.com/ChaoningZhang/MobileSAM).
+//! - 💡 The default backbone can be replaced by the smaller and faster TinyViT model based on [MobileSAM](https://github.com/ChaoningZhang/MobileSAM).
 //!
 //!
 //! ## Example
diff --git a/candle-transformers/src/models/stable_diffusion/mod.rs b/candle-transformers/src/models/stable_diffusion/mod.rs
index 21a5f6bba2..458a7de2d4 100644
--- a/candle-transformers/src/models/stable_diffusion/mod.rs
+++ b/candle-transformers/src/models/stable_diffusion/mod.rs
@@ -14,8 +14,7 @@
 //!   <img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/stable-diffusion/assets/stable-diffusion-xl.jpg" alt="rusty robot holding a candle" width=320>
 //! </div>
 //!
-//! > _"A rusty robot holding a fire torch in its hand."_
-//! Generated by Stable Diffusion XL using Rust and [candle](https://github.com/huggingface/candle).
+//! _"A rusty robot holding a fire torch in its hand."_ Generated by Stable Diffusion XL using Rust and [candle](https://github.com/huggingface/candle).
 //!
 //! ```bash
 //! # example running with cuda

From ba6abe0069d7bef88a5cf09c67b0a71eb0c26129 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sun, 17 Nov 2024 11:54:43 -0500
Subject: [PATCH 14/20] ignore bertdoc

---
 candle-transformers/src/models/bert.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/candle-transformers/src/models/bert.rs b/candle-transformers/src/models/bert.rs
index 808ca41557..bd11b01a90 100644
--- a/candle-transformers/src/models/bert.rs
+++ b/candle-transformers/src/models/bert.rs
@@ -7,7 +7,7 @@
 //! - Upstream [Github repo](https://github.com/google-research/bert).
 //! - See bert in [candle-examples](https://github.com/huggingface/candle/tree/main/candle-examples/) for runnable code
 //!
-//! ```no_run
+//! ```ignore
 //! // for sentence embeddings
 //! # use candle_core::Tensor;
 //! # use candle_nn::{VarBuilder, Module};

From 8a34fffa059586b9bebe56696fe534ee970394f8 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sun, 17 Nov 2024 11:56:18 -0500
Subject: [PATCH 15/20] dinov2 ignore

---
 candle-transformers/src/models/dinov2.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs
index c737042547..2317eee239 100644
--- a/candle-transformers/src/models/dinov2.rs
+++ b/candle-transformers/src/models/dinov2.rs
@@ -6,7 +6,7 @@
 //!
 //! # Example usage
 //!
-//! ```rust,no_run
+//! ```ignore
 //! # use candle::Result;
 //! # fn main() -> Result<()> {
 //! use candle_transformers::dinov2::vit_small;

From 9ddb2b0dd554fdd0015f14c5cc5c57c2a76cf3f3 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sun, 17 Nov 2024 12:03:11 -0500
Subject: [PATCH 16/20] update block to be text

---
 candle-transformers/src/models/bert.rs   | 22 +++++++++++-----------
 candle-transformers/src/models/dinov2.rs |  2 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/candle-transformers/src/models/bert.rs b/candle-transformers/src/models/bert.rs
index bd11b01a90..a7283ea52e 100644
--- a/candle-transformers/src/models/bert.rs
+++ b/candle-transformers/src/models/bert.rs
@@ -7,26 +7,26 @@
 //! - Upstream [Github repo](https://github.com/google-research/bert).
 //! - See bert in [candle-examples](https://github.com/huggingface/candle/tree/main/candle-examples/) for runnable code
 //!
-//! ```ignore
-//! // for sentence embeddings
+//! ```rust,text
+//! // Different models can be loaded using the model ID
 //! # use candle_core::Tensor;
 //! # use candle_nn::{VarBuilder, Module};
 //! # fn main() -> candle_core::Result<()> {
-//! # let model = todo!();
-//! # let prompt = "Here is a test sentence";
-//! let embeddings = model.forward(prompt)?;
-//! // Returns tensor of shape [1, 7, 384]
-//! println!("{embeddings}");
+//! # let vb = todo!();
+//! # let config = todo!();
+//! let model = BertModel::load(vb, &config )?;
 //! # Ok(())
 //! # }
 //!
-//! // Different models can be loaded using the model ID
+//! // for sentence embeddings
 //! # use candle_core::Tensor;
 //! # use candle_nn::{VarBuilder, Module};
 //! # fn main() -> candle_core::Result<()> {
-//! # let vb = todo!();
-//! # let config = todo!();
-//! let model = BertModel::load(vb, &config )?;
+//! # let model = todo!();
+//! let prompt = "Here is a test sentence";
+//! let embeddings = model.forward(prompt)?;
+//! // Returns tensor of shape [1, 7, 384]
+//! println!("{embeddings}");
 //! # Ok(())
 //! # }
 //!
diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs
index 2317eee239..d960adb51b 100644
--- a/candle-transformers/src/models/dinov2.rs
+++ b/candle-transformers/src/models/dinov2.rs
@@ -6,7 +6,7 @@
 //!
 //! # Example usage
 //!
-//! ```ignore
+//! ```rust,text
 //! # use candle::Result;
 //! # fn main() -> Result<()> {
 //! use candle_transformers::dinov2::vit_small;

From 7649c27e599c8bd26cd5009d410086a15d798a40 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sun, 17 Nov 2024 12:34:52 -0500
Subject: [PATCH 17/20] remove the rust blocks for the moment

---
 candle-transformers/src/models/bert.rs   | 50 ------------------------
 candle-transformers/src/models/dinov2.rs | 11 ------
 2 files changed, 61 deletions(-)

diff --git a/candle-transformers/src/models/bert.rs b/candle-transformers/src/models/bert.rs
index a7283ea52e..da8734160a 100644
--- a/candle-transformers/src/models/bert.rs
+++ b/candle-transformers/src/models/bert.rs
@@ -7,56 +7,6 @@
 //! - Upstream [Github repo](https://github.com/google-research/bert).
 //! - See bert in [candle-examples](https://github.com/huggingface/candle/tree/main/candle-examples/) for runnable code
 //!
-//! ```rust,text
-//! // Different models can be loaded using the model ID
-//! # use candle_core::Tensor;
-//! # use candle_nn::{VarBuilder, Module};
-//! # fn main() -> candle_core::Result<()> {
-//! # let vb = todo!();
-//! # let config = todo!();
-//! let model = BertModel::load(vb, &config )?;
-//! # Ok(())
-//! # }
-//!
-//! // for sentence embeddings
-//! # use candle_core::Tensor;
-//! # use candle_nn::{VarBuilder, Module};
-//! # fn main() -> candle_core::Result<()> {
-//! # let model = todo!();
-//! let prompt = "Here is a test sentence";
-//! let embeddings = model.forward(prompt)?;
-//! // Returns tensor of shape [1, 7, 384]
-//! println!("{embeddings}");
-//! # Ok(())
-//! # }
-//!
-//! // Gelu approximation
-//! // You can get a speedup by configuring the model
-//! // to use an approximation of the gelu activation:
-//! # use candle_core::Tensor;
-//! # use candle_nn::{VarBuilder, Module};
-//! # fn main() -> candle_core::Result<()> {
-//! # let mut config = todo!();
-//! config.hidden_act = HiddenAct::GeluApproximate;
-//! # Ok(())
-//! # }
-//!
-//! // Similarities
-//! // Bert can compute sentence embeddings which can then be used to calculate
-//! // semantic similarities between sentences through cosine similarity scoring.
-//! // The sentence embeddings are computed using average pooling across all tokens.
-//! # use candle_core::Tensor;
-//! # use candle_nn::{VarBuilder, Module};
-//! # fn main() -> candle_core::Result<()> {
-//! # let model = todo!();
-//! let sentence1 = "The new movie is awesome";
-//! let sentence2 = "The new movie is so great";
-//! let emb1 = model.forward(sentence1)?;
-//! let emb2 = model.forward(sentence2)?;
-//! # Ok(())
-//! # }
-//! ```
-//!
 use super::with_tracing::{layer_norm, linear, LayerNorm, Linear};
 use candle::{DType, Device, Result, Tensor};
 use candle_nn::{embedding, Embedding, Module, VarBuilder};
diff --git a/candle-transformers/src/models/dinov2.rs b/candle-transformers/src/models/dinov2.rs
index d960adb51b..4d46941f8b 100644
--- a/candle-transformers/src/models/dinov2.rs
+++ b/candle-transformers/src/models/dinov2.rs
@@ -4,17 +4,6 @@
 //! DINOv2 is a self-supervised learning model that can learn visual features
 //! without using any labeled data. See: ["DINOv2: Learning Robust Visual Features without Supervision"](https://github.com/facebookresearch/dinov2)
 //!
-//! # Example usage
-//!
-//! ```rust,text
-//! # use candle::Result;
-//! # fn main() -> Result<()> {
-//! use candle_transformers::dinov2::vit_small;
-//! let model = vit_small(vb)?;
-//! # Ok(())
-//! # }
-//! ```
-//!
 //! ## Running an example with color map and CUDA
 //!
 //! ```bash

From 7f75c1866207a33eb6d11b2c543fe1d41b05a1e1 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sun, 17 Nov 2024 13:32:36 -0500
Subject: [PATCH 18/20] bump python to 3.11

---
 .github/workflows/rust-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
index ee480c474c..76137e9487 100644
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -14,6 +14,7 @@ jobs:
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
         rust: [stable]
+        python-version: ["3.11"]
     steps:
       - uses: actions/checkout@v4
       - uses: actions-rs/toolchain@v1

From 162fbaa5eaef6a87f8b4f2f3a57c78aa21758fff Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sun, 17 Nov 2024 13:58:04 -0500
Subject: [PATCH 19/20] add a setup-python step

---
 .github/workflows/rust-ci.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
index 76137e9487..6c86b3dd98 100644
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -14,9 +14,11 @@ jobs:
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
         rust: [stable]
-        python-version: ["3.11"]
     steps:
       - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
       - uses: actions-rs/toolchain@v1
         with:
           profile: minimal

From 5555d6ad7d9b694c2c960988e86f0c7ad74ee686 Mon Sep 17 00:00:00 2001
From: Zachary Charlop-Powers <zach.charlop.powers@gmail.com>
Date: Sun, 17 Nov 2024 14:11:20 -0500
Subject: [PATCH 20/20] add py311 to test as well

---
 .github/workflows/rust-ci.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
index 6c86b3dd98..db25503079 100644
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -38,6 +38,9 @@ jobs:
         rust: [stable]
     steps:
       - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
       - uses: actions-rs/toolchain@v1
         with:
           profile: minimal