diff --git a/Cargo.lock b/Cargo.lock index 0896a3d35c..bc66a5f125 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -415,7 +415,6 @@ dependencies = [ "arboard", "burn", "burn-common", - "burn-wgpu", "clap 4.5.23", "colored", "cubecl", diff --git a/backend-comparison/Cargo.toml b/backend-comparison/Cargo.toml index 39ddd0c6f5..ee5f0bd8a2 100644 --- a/backend-comparison/Cargo.toml +++ b/backend-comparison/Cargo.toml @@ -34,7 +34,7 @@ wgpu-spirv-fusion = ["wgpu-spirv", "burn/fusion"] arboard = { workspace = true } burn = { path = "../crates/burn", default-features = false } burn-common = { path = "../crates/burn-common", version = "0.16.0" } -burn-wgpu = { path = "../crates/burn-wgpu", default-features = false, version = "0.16.0", optional = true } + clap = { workspace = true } colored = { workspace = true } cubecl = { workspace = true, features = ["wgpu"], default-features = true } @@ -96,6 +96,11 @@ name = "conv3d" harness = false name = "matmul" +[[bench]] +harness = false +name = "matmul-fused" +path = "benches/matmul_fused.rs" + [[bench]] harness = false name = "data" diff --git a/backend-comparison/benches/matmul.rs b/backend-comparison/benches/matmul.rs index 6970408a58..d950c71926 100644 --- a/backend-comparison/benches/matmul.rs +++ b/backend-comparison/benches/matmul.rs @@ -1,5 +1,5 @@ use backend_comparison::persistence::save; -use burn::tensor::{activation::relu, backend::Backend, Shape, Tensor}; +use burn::tensor::{backend::Backend, Distribution, Shape, Tensor}; use burn_common::benchmark::{run_benchmark, Benchmark}; use derive_new::new; @@ -11,7 +11,7 @@ struct MatmulBenchmark { } impl Benchmark for MatmulBenchmark { - type Args = (Tensor, Tensor, Tensor); + type Args = (Tensor, Tensor); fn name(&self) -> String { "matmul".into() @@ -21,17 +21,15 @@ impl Benchmark for MatmulBenchmark { vec![self.shape_lhs.dims.clone(), self.shape_rhs.dims.clone()] } - fn execute(&self, (lhs, rhs, bias): Self::Args) { - let bias = bias.unsqueeze(); - relu(lhs.matmul(rhs) + bias); + fn execute(&self, (lhs, rhs): Self::Args) { + lhs.matmul(rhs); } fn prepare(&self) -> Self::Args { - let lhs = Tensor::zeros(self.shape_lhs.clone(), &self.device); - let rhs = Tensor::zeros(self.shape_rhs.clone(), &self.device); - let bias = Tensor::zeros([self.shape_rhs.dims[2]], &self.device); + let lhs = Tensor::random(self.shape_lhs.clone(), Distribution::Default, &self.device); + let rhs = Tensor::random(self.shape_rhs.clone(), Distribution::Default, &self.device); - (lhs, rhs, bias) + (lhs, rhs) } fn sync(&self) { diff --git a/backend-comparison/benches/matmul_fused.rs b/backend-comparison/benches/matmul_fused.rs new file mode 100644 index 0000000000..375be97b4e --- /dev/null +++ b/backend-comparison/benches/matmul_fused.rs @@ -0,0 +1,74 @@ +use backend_comparison::persistence::save; +use burn::tensor::{activation::relu, backend::Backend, Distribution, Shape, Tensor}; +use burn_common::benchmark::{run_benchmark, Benchmark}; +use derive_new::new; + +#[derive(new)] +struct MatmulBenchmark { + shape_lhs: Shape, + shape_rhs: Shape, + device: B::Device, +} + +impl Benchmark for MatmulBenchmark { + type Args = (Tensor, Tensor, Tensor); + + fn name(&self) -> String { + "matmul_bias_relu".into() + } + + fn shapes(&self) -> Vec> { + vec![self.shape_lhs.dims.clone(), self.shape_rhs.dims.clone()] + } + + fn execute(&self, (lhs, rhs, bias): Self::Args) { + let bias = bias.unsqueeze(); + relu(lhs.matmul(rhs) + bias); + } + + fn prepare(&self) -> Self::Args { + let lhs = Tensor::random(self.shape_lhs.clone(), Distribution::Default, &self.device); + let rhs = Tensor::random(self.shape_rhs.clone(), Distribution::Default, &self.device); + let bias = Tensor::random( + [self.shape_rhs.dims[2]], + Distribution::Default, + &self.device, + ); + + (lhs, rhs, bias) + } + + fn sync(&self) { + B::sync(&self.device) + } +} + +#[allow(dead_code)] +fn bench( + device: &B::Device, + feature_name: &str, + url: Option<&str>, + token: Option<&str>, +) { + let benchmarks = [ + (2, 4096, 4096, 4096), + (32, 2048, 2048, 2048), + (256, 1024, 1024, 1024), + (1024, 256, 256, 256), + ] + .into_iter() + .map(|(b, m, n, k)| { + let shape_lhs = [b, m, k].into(); + let shape_rhs = [b, k, n].into(); + + MatmulBenchmark::::new(shape_lhs, shape_rhs, device.clone()) + }) + .map(run_benchmark) + .collect(); + + save::(benchmarks, device, feature_name, url, token).unwrap(); +} + +fn main() { + backend_comparison::bench_on_backend!(); +} diff --git a/backend-comparison/src/burnbenchapp/base.rs b/backend-comparison/src/burnbenchapp/base.rs index 0424e00c4e..83c5060a6b 100644 --- a/backend-comparison/src/burnbenchapp/base.rs +++ b/backend-comparison/src/burnbenchapp/base.rs @@ -103,6 +103,8 @@ enum BenchmarkValues { Data, #[strum(to_string = "matmul")] Matmul, + #[strum(to_string = "matmul-fused")] + MatmulFused, #[strum(to_string = "unary")] Unary, #[strum(to_string = "max-pool2d")]