Skip to content

Commit

Permalink
Merge pull request #2 from spiceai/jack/spiceai
Browse files Browse the repository at this point in the history
Update from `EricLBuehler/main`
  • Loading branch information
Jeadie authored Aug 1, 2024
2 parents 41aaac4 + 2064fb0 commit 30a1278
Show file tree
Hide file tree
Showing 122 changed files with 6,457 additions and 1,147 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target/
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock

# editor config
.helix
.vscode

# These are backup files generated by rustfmt
**/*.rs.bk

Expand Down
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
//"rust-analyzer.cargo.features": ["cuda"],
"rust-analyzer.cargo.features": ["cuda"],
}
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ candle-onnx = { path = "./candle-onnx", version = "0.6.0" }
candle-transformers = { path = "./candle-transformers", version = "0.6.0" }
clap = { version = "4.2.4", features = ["derive"] }
criterion = { version = "0.5.1", default-features=false }
cudarc = { version = "0.11.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
cudarc = { version = "=0.11.6", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
fancy-regex = "0.13.0"
gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
hf-hub = "0.3.0"
half = { version = "2.3.1", features = ["num-traits", "use-intrinsics", "rand_distr"] }
hound = "3.5.1"
image = { version = "0.25.0", default-features = false, features = ["jpeg", "png"] }
image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
imageproc = { version = "0.24.0", default-features = false }
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] }
libc = { version = "0.2.147" }
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ If you have an addition to this list, please submit a pull request.
- MetaVoice-1B, text-to-speech model.
- Computer Vision Models.
- DINOv2, ConvMixer, EfficientNet, ResNet, ViT, VGG, RepVGG, ConvNeXT,
ConvNeXTv2, MobileOne, EfficientVit (MSRA).
ConvNeXTv2, MobileOne, EfficientVit (MSRA), MobileNetv4.
- yolo-v3, yolo-v8.
- Segment-Anything Model (SAM).
- SegFormer.
Expand Down
4 changes: 4 additions & 0 deletions candle-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,7 @@ metal = ["dep:metal", "dep:candle-metal-kernels"]
[[bench]]
name = "bench_main"
harness = false

[[example]]
name = "metal_basics"
required-features = ["metal"]
2 changes: 1 addition & 1 deletion candle-core/benches/benchmarks/affine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fn run_affine_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name:
let m = 1024;
let k = 1024;

let tensor = Tensor::zeros((b, m, k), dtype, &device).unwrap();
let tensor = Tensor::zeros((b, m, k), dtype, device).unwrap();

let flops = b * m * k * dtype.size_in_bytes();

Expand Down
4 changes: 2 additions & 2 deletions candle-core/benches/benchmarks/qmatmul.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, Criterion, Throughput};
use std::time::Instant;

fn run(matmul: &QMatMul, x: &Tensor) {
matmul.forward(&x).unwrap();
matmul.forward(x).unwrap();
}

fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
Expand Down Expand Up @@ -50,7 +50,7 @@ fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
fn criterion_benchmark(c: &mut Criterion) {
let handler = BenchDeviceHandler::new().unwrap();
for device in handler.devices {
for dtype in vec![
for dtype in [
GgmlDType::F32,
GgmlDType::F16,
GgmlDType::Q4_0,
Expand Down
2 changes: 1 addition & 1 deletion candle-core/benches/benchmarks/unary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fn run_unary_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &
let m = 1024;
let k = 1024;

let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, &device)
let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, device)
.unwrap()
.to_dtype(dtype)
.unwrap()
Expand Down
6 changes: 3 additions & 3 deletions candle-core/benches/benchmarks/where_cond.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ const SIZE: usize = B * M * K;
const DATA: [u8; SIZE] = create_cond_arr::<SIZE>();

fn run_where_cond_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) {
let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), &device).unwrap();
let on_true = Tensor::ones((B, M, K), dtype, &device).unwrap();
let on_false = Tensor::zeros((B, M, K), dtype, &device).unwrap();
let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), device).unwrap();
let on_true = Tensor::ones((B, M, K), dtype, device).unwrap();
let on_false = Tensor::zeros((B, M, K), dtype, device).unwrap();

let elements = B * M * K;
// E.g. 2 f32 tensors + 1 u8 tensor
Expand Down
28 changes: 28 additions & 0 deletions candle-core/examples/metal_basics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#[cfg(feature = "accelerate")]
extern crate accelerate_src;

#[cfg(feature = "mkl")]
extern crate intel_mkl_src;

use anyhow::Result;
use candle_core::{Device, Tensor};

fn main() -> Result<()> {
// This requires the code to be run with MTL_CAPTURE_ENABLED=1
let device = Device::new_metal(0)?;
let metal_device = match &device {
Device::Metal(m) => m,
_ => anyhow::bail!("unexpected device"),
};
metal_device.capture("/tmp/candle.gputrace")?;
// This first synchronize ensures that a new command buffer gets created after setting up the
// capture scope.
device.synchronize()?;
let x = Tensor::randn(0f32, 1.0, (128, 128), &device)?;
let x1 = x.add(&x)?;
println!("{x1:?}");
// This second synchronize ensures that the command buffer gets commited before the end of the
// capture scope.
device.synchronize()?;
Ok(())
}
7 changes: 4 additions & 3 deletions candle-core/src/backprop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,13 @@ impl Tensor {
dilation,
output_padding: _output_padding,
} => {
let grad_arg = grad.conv2d(kernel, *padding, *dilation, *stride, 1)?;
let grad_arg = grad.conv2d(kernel, *padding, *stride, *dilation, 1)?;
let sum_grad = grads.or_insert(arg)?;
*sum_grad = sum_grad.add(&grad_arg)?;

let grad_kernel = grad
.transpose(0, 1)?
.conv2d(&arg.transpose(0, 1)?, *padding, *stride, *dilation, 1)?
.conv2d(&arg.transpose(0, 1)?, *padding, *dilation, *stride, 1)?
.transpose(0, 1)?;
let sum_grad = grads.or_insert(kernel)?;
let (_, _, k0, k1) = kernel.dims4()?;
Expand Down Expand Up @@ -634,7 +634,8 @@ impl Tensor {
let zeros = arg.zeros_like()?;
let positive_mask = arg.gt(&zeros)?.to_dtype(arg.dtype())?;
let negative_mask = arg.le(&zeros)?.to_dtype(arg.dtype())?;
let negative_exp_mask = ((negative_mask * arg.exp())? * *alpha)?;
// node == alpha * (e^x - 1) for x <= 0, reuse it
let negative_exp_mask = (negative_mask * (*node + *alpha))?;
let combined_mask = (positive_mask + negative_exp_mask)?;
*sum_grad = sum_grad.add(&(grad * combined_mask)?)?
}
Expand Down
4 changes: 4 additions & 0 deletions candle-core/src/cuda_backend/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ impl std::ops::Deref for CudaDevice {
}

impl CudaDevice {
pub fn cublas_handle(&self) -> &cudarc::cublas::CudaBlas {
&*self.blas
}

pub fn cuda_device(&self) -> Arc<cudarc::driver::CudaDevice> {
self.device.clone()
}
Expand Down
8 changes: 7 additions & 1 deletion candle-core/src/metal_backend/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,13 @@ impl MetalDevice {
let descriptor = metal::CaptureDescriptor::new();
descriptor.set_destination(metal::MTLCaptureDestination::GpuTraceDocument);
descriptor.set_capture_device(self);
descriptor.set_output_url(path);
// The [set_output_url] call requires an absolute path so we convert it if needed.
if path.as_ref().is_absolute() {
descriptor.set_output_url(path);
} else {
let path = std::env::current_dir()?.join(path);
descriptor.set_output_url(path);
}

capture
.start_capture(&descriptor)
Expand Down
6 changes: 3 additions & 3 deletions candle-core/src/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,9 +651,9 @@ impl Tensor {
///
/// * `args` - A slice of 1D tensors.
/// * `xy_indexing` - Whether to use xy indexing or ij indexing. If xy is selected, the
/// first dimension corresponds to the cardinality of the second input and the second
/// dimension corresponds to the cardinality of the first input. If ij is selected, the
/// dimensions are in the same order as the cardinality of the inputs.
/// first dimension corresponds to the cardinality of the second input and the second
/// dimension corresponds to the cardinality of the first input. If ij is selected, the
/// dimensions are in the same order as the cardinality of the inputs.
///
/// # Examples
///
Expand Down
97 changes: 97 additions & 0 deletions candle-core/tests/conv_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,103 @@ fn conv2d_grad(dev: &Device) -> Result<()> {
]
]
);

// Test the same, but then with the following properties, t & w are unmodified.
let padding = 1;
let outpadding = 1;
let dilation = 1;
let stride = 2;

let res = t.conv_transpose2d(&w, padding, outpadding, stride, dilation)?;
let loss = res.sqr()?.sum_all()?;
assert_eq!(test_utils::to_vec0_round(&loss, 0)?, 3627.0); // torch gives 3626.8560

let grads = loss.backward()?;

let grad_t = grads.get(&t).unwrap();
let grad_w = grads.get(&w).unwrap();
assert_eq!(grad_t.dims(), [1, 4, 7, 5]);
assert_eq!(grad_w.dims(), [4, 2, 3, 5]);

#[rustfmt::skip]
assert_eq!(
test_utils::to_vec3_round(&grad_t.i(0)?, 1)?,
[
[
[ 13.2, -40.7, -9.7, -47.3, -82.7],
[ -98.2, 9.7, 57.7, -6.2, 180.7],
[ 100.2, 24.1, 3.7, -100.5, -48.1],
[ -0.3, 13.5, -2.9, 80.0, -49.8],
[ 47.2, -25.6, -74.4, 61.2, -18.4],
[ 4.6, -69.5, 27.9, 66.5, -88.1],
// 4th column on next row; torch is 4.2
[ -12.0, 79.2, -40.0, 4.1, -97.1],
],
[
[ -42.2, -36.5, -51.1, 7.5, 32.3],
[ 74.1, -44.6, -68.8, 19.5, 7.7],
[ 137.1, 54.2, 153.8, -58.0, 45.5],
[ 24.4, -56.8, 9.7, -41.0, -14.5],
[ -3.7, 72.6, 8.3, 134.8, 40.5],
[ 43.2, -56.9, -47.5, -89.4, -95.4],
[ 68.2, 108.1, -80.0, 57.0, -121.1]
],
[
[ 31.1, -11.4, -34.8, 33.1, -44.2],
[ 29.4, -31.6, -40.2, 13.7, 13.1],
[ -0.8, -83.8, -7.8, -17.3, 78.2],
[ 12.0, -118.7, 137.5, -76.7, 50.8],
[ -28.7, -114.2, -3.7, -96.3, -13.8],
[ -31.8, 28.5, -14.3, 4.6, 13.4],
[ 28.0, -0.2, -38.9, -29.7, -59.0]
],
[
[ -16.8, 38.5, 15.5, 26.6, 48.9],
[ 14.5, 49.6, -24.8, 65.6, 61.7],
[ 22.1, -64.7, -4.3, -51.0, 36.3],
[ 31.0, -88.9, 47.1, -123.5, -3.8],
[ -14.8, -39.8, 128.2, -110.3, 42.6],
// 1st column on next row; torch is -7.2
[ -7.1, 95.3, -21.3, -58.7, -13.9],
[ 26.9, 21.3, 16.1, 70.3, 32.1]
]
]
);

#[rustfmt::skip]
assert_eq!(
test_utils::to_vec1_round(&grad_w.flatten_all()?, 1)?,
[
// 2nd value; torch gets -3.2, 3rd value; torch gets 221.8
-2.460e+01, -3.100e+00, 2.219e+02, 7.400e+00, 5.620e+01,
7.420e+01, 7.830e+01, 8.900e+00, 1.050e+01, 2.810e+01,
5.100e+00, -1.046e+02, -1.572e+02, 8.710e+01, -9.840e+01,
-4.230e+01, -1.898e+02, 1.860e+01, -3.570e+01, 9.810e+01,
4.680e+01, 1.182e+02, 4.020e+01, -1.900e+00, 1.508e+02,
1.094e+02, 1.018e+02, -4.620e+01, 1.591e+02, -2.320e+01,
// 5th value; torch gets 7.1
-8.450e+01, -4.600e+00, 6.330e+01, 1.123e+02, -7.000e+00,
1.101e+02, -6.620e+01, 2.090e+01, -5.120e+01, 8.990e+01,
9.050e+01, -6.990e+01, 6.800e+01, -9.250e+01, 1.380e+02,
4.720e+01, 4.710e+01, 6.210e+01, 8.870e+01, 2.098e+02,
3.870e+01, -1.390e+01, 6.270e+01, 1.484e+02, -9.920e+01,
-4.200e+01, -1.505e+02, -1.480e+01, -2.620e+01, 8.220e+01,
-3.350e+01, -2.260e+01, -1.198e+02, -5.080e+01, 1.259e+02,
5.600e+01, 9.270e+01, 1.209e+02, 6.590e+01, -8.330e+01,
7.000e+00, -2.600e+01, -1.133e+02, 3.870e+01, 4.020e+01,
-6.300e+00, -8.710e+01, -5.150e+01, -8.510e+01, 2.000e-01,
3.640e+01, -6.100e+00, 6.590e+01, -2.700e+00, 6.550e+01,
// 4th value; torch gets 3.8
5.300e+00, -6.760e+01, -4.270e+01, -3.900e+00, 2.880e+01,
5.260e+01, 6.170e+01, -1.203e+02, -1.610e+01, 7.740e+01,
-1.008e+02, -1.070e+01, -9.900e+00, 3.300e+00, -2.620e+01,
-4.440e+01, 2.580e+01, -6.920e+01, -4.220e+01, 1.108e+02,
1.240e+01, -3.440e+01, -2.800e+00, 7.880e+01, -6.690e+01,
1.480e+01, 2.310e+01, -4.260e+01, -1.500e+00, -4.760e+01,
5.350e+01, -2.260e+01, 8.000e-01, -3.840e+01, -2.500e+00
]
);

Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion candle-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ serde = { workspace = true }
serde_json = { workspace = true }
symphonia = { version = "0.5.3", features = ["all"], optional = true }
tokenizers = { workspace = true, features = ["onig"] }
cpal= { version = "0.15.2", optional = true }
cpal = { version = "0.15.2", optional = true }

[dev-dependencies]
anyhow = { workspace = true }
Expand Down
20 changes: 20 additions & 0 deletions candle-examples/examples/beit/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# candle-beit

[Beit](https://arxiv.org/abs/2106.08254) is a computer vision model.
In this example, it is used as an ImageNet classifier: the model returns the
probability for the image to belong to each of the 1000 ImageNet categories.

## Running some example

```bash
cargo run --example beit --release -- --image candle-examples/examples/yolo-v8/assets/bike.jpg

> mountain bike, all-terrain bike, off-roader: 56.16%
> bicycle-built-for-two, tandem bicycle, tandem: 3.08%
> maillot : 2.23%
> alp : 0.88%
> crash helmet : 0.85%

```

![Leading group, Giro d'Italia 2021](../yolo-v8/assets/bike.jpg)
Loading

0 comments on commit 30a1278

Please sign in to comment.