diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md
index 06d5621fa..2f4598c33 100644
--- a/CONTRIBUTE.md
+++ b/CONTRIBUTE.md
@@ -128,3 +128,15 @@ fn main() {
     /// ...
 }
 ```
+
+## Use the lapack trait bound
+
+When you want to implement an algorithm which requires the [Lapack](https://docs.rs/ndarray-linalg/0.13.1/ndarray_linalg/types/trait.Lapack.html) bound, then you could add the trait bound to the `linfa::Float` standard bound, e.g. `F: Float + Scalar + Lapack`. If you do that you're currently running into conflicting function definitions of [num_traits::Float](https://docs.rs/num-traits/0.2.14/num_traits/float/trait.Float.html) and [cauchy::Scalar](https://docs.rs/cauchy/0.4.0/cauchy/trait.Scalar.html) with the first defined for real-valued values and the second for complex values. 
+
+If you want to avoid that you can use the `linfa::dataset::{WithLapack, WithoutLapack}` traits, which basically adds the lapack trait bound for a block and then removes it again so that the conflicts can be avoided. For example:
+```rust
+let decomp = covariance.with_lapack().cholesky(UPLO::Lower)?;
+let sol = decomp
+     .solve_triangular(UPLO::Lower, Diag::NonUnit, &Array::eye(n_features))?
+     .without_lapack();
+```
diff --git a/Cargo.toml b/Cargo.toml
index de8686d3a..6788607bf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa"
-version = "0.3.1"
+version = "0.4.0"
 authors = [
     "Luca Palmieri <rust@lpalmieri.com>",
     "Lorenz Schmidt <bytesnake@mailbox.org>",
diff --git a/algorithms/linfa-bayes/Cargo.toml b/algorithms/linfa-bayes/Cargo.toml
index 4d904db43..d1001ae4b 100644
--- a/algorithms/linfa-bayes/Cargo.toml
+++ b/algorithms/linfa-bayes/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-bayes"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["VasanthakumarV <vasanth260m12@gmail.com>"]
 description = "Collection of Naive Bayes Algorithms"
 edition = "2018"
@@ -15,8 +15,8 @@ ndarray = { version = "0.14" , features = ["blas", "approx"]}
 ndarray-stats = "0.4"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
 approx = "0.4"
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["winequality"] }
diff --git a/algorithms/linfa-clustering/Cargo.toml b/algorithms/linfa-clustering/Cargo.toml
index bd8d38005..23299340f 100644
--- a/algorithms/linfa-clustering/Cargo.toml
+++ b/algorithms/linfa-clustering/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-clustering"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2018"
 authors = [
     "Luca Palmieri <rust@lpalmieri.com>",
@@ -36,7 +36,8 @@ num-traits = "0.2"
 rand_isaac = "0.3"
 thiserror = "1"
 partitions = "0.2.4"
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
+
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
 ndarray-npy = { version = "0.7", default-features = false }
diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
index 2388d3006..4cadb3bc7 100644
--- a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
+++ b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
@@ -257,8 +257,6 @@ impl<F: Float> GaussianMixtureModel<F> {
         let n_features = covariances.shape()[1];
         let mut precisions_chol = Array::zeros((n_clusters, n_features, n_features));
         for (k, covariance) in covariances.outer_iter().enumerate() {
-            dbg!(&covariance.shape());
-            dbg!(&covariance.with_lapack().shape());
             let decomp = covariance.with_lapack().cholesky(UPLO::Lower)?;
             let sol = decomp
                 .solve_triangular(UPLO::Lower, Diag::NonUnit, &Array::eye(n_features))?
diff --git a/algorithms/linfa-elasticnet/Cargo.toml b/algorithms/linfa-elasticnet/Cargo.toml
index 912e82d26..c86ffc84b 100644
--- a/algorithms/linfa-elasticnet/Cargo.toml
+++ b/algorithms/linfa-elasticnet/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-elasticnet"
-version = "0.3.1"
+version = "0.4.0"
 authors = [
     "Paul Körbitz / Google <koerbitz@google.com>",
     "Lorenz Schmidt <bytesnake@mailbox.org>"
@@ -35,9 +35,9 @@ num-traits = "0.2"
 approx = "0.4"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["diabetes"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["diabetes"] }
 ndarray-rand = "0.13"
 rand_isaac = "0.3"
diff --git a/algorithms/linfa-hierarchical/Cargo.toml b/algorithms/linfa-hierarchical/Cargo.toml
index da8443321..35c05b307 100644
--- a/algorithms/linfa-hierarchical/Cargo.toml
+++ b/algorithms/linfa-hierarchical/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-hierarchical"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <lorenz.schmidt@mailbox.org>"]
 edition = "2018"
 
@@ -17,10 +17,10 @@ categories = ["algorithms", "mathematics", "science"]
 ndarray = { version = "0.14", default-features = false }
 kodama = "0.2"
 
-linfa = { version = "0.3.1", path = "../.." }
-linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
+linfa = { version = "0.4.0", path = "../.." }
+linfa-kernel = { version = "0.4.0", path = "../linfa-kernel" }
 
 [dev-dependencies]
 rand = "0.8"
 ndarray-rand = "0.13"
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["iris"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["iris"] }
diff --git a/algorithms/linfa-ica/Cargo.toml b/algorithms/linfa-ica/Cargo.toml
index 4eb0f7290..6a77ce37b 100644
--- a/algorithms/linfa-ica/Cargo.toml
+++ b/algorithms/linfa-ica/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-ica"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["VasanthakumarV <vasanth260m12@gmail.com>"]
 description = "A collection of Independent Component Analysis (ICA) algorithms"
 edition = "2018"
@@ -32,7 +32,7 @@ num-traits = "0.2"
 rand_isaac = "0.3"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
 ndarray-npy = { version = "0.7", default-features = false }
diff --git a/algorithms/linfa-kernel/Cargo.toml b/algorithms/linfa-kernel/Cargo.toml
index da6dbdec8..7e25c3719 100644
--- a/algorithms/linfa-kernel/Cargo.toml
+++ b/algorithms/linfa-kernel/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-kernel"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <bytesnake@mailbox.org>"]
 description = "Kernel methods for non-linear algorithms"
 edition = "2018"
@@ -30,4 +30,4 @@ sprs = { version="0.9.4", default-features = false }
 hnsw = "0.6"
 space = "0.10"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
diff --git a/algorithms/linfa-linear/Cargo.toml b/algorithms/linfa-linear/Cargo.toml
index a5fefe8c8..1aecf7357 100644
--- a/algorithms/linfa-linear/Cargo.toml
+++ b/algorithms/linfa-linear/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-linear"
-version = "0.3.1"
+version = "0.4.0"
 authors = [
     "Paul Körbitz / Google <koerbitz@google.com>",
     "VasanthakumarV <vasanth260m12@gmail.com>"
@@ -25,8 +25,8 @@ argmin = { version = "0.4", features = ["ndarrayl"] }
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["diabetes"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["diabetes"] }
 approx = "0.4"
diff --git a/algorithms/linfa-logistic/Cargo.toml b/algorithms/linfa-logistic/Cargo.toml
index 495533637..2064e8eb0 100644
--- a/algorithms/linfa-logistic/Cargo.toml
+++ b/algorithms/linfa-logistic/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-logistic"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Paul Körbitz / Google <koerbitz@google.com>"]
 
 description = "A Machine Learning framework for Rust"
@@ -21,8 +21,8 @@ argmin = { version = "0.4", features = ["ndarrayl"] }
 serde = "1.0"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
 approx = "0.4"
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["winequality"] }
diff --git a/algorithms/linfa-pls/Cargo.toml b/algorithms/linfa-pls/Cargo.toml
index 1221010c1..e0a96b99a 100644
--- a/algorithms/linfa-pls/Cargo.toml
+++ b/algorithms/linfa-pls/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-pls"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2018"
 authors = ["relf <remi.lafage@onera.fr>"]
 description = "Partial Least Squares family methods"
@@ -32,9 +32,9 @@ rand_isaac = "0.3"
 num-traits = "0.2"
 paste = "1.0"
 thiserror = "1"
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["linnerud"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["linnerud"] }
 rand_isaac = "0.3"
 approx = "0.4"
diff --git a/algorithms/linfa-pls/README.md b/algorithms/linfa-pls/README.md
new file mode 100644
index 000000000..19b042ed5
--- /dev/null
+++ b/algorithms/linfa-pls/README.md
@@ -0,0 +1,27 @@
+# Partial Least Squares
+
+`linfa-pls` provides a pure Rust implementation of the partial least squares algorithm family.
+
+## The Big Picture
+
+`linfa-pls` is a crate in the [`linfa`](https://crates.io/crates/linfa) ecosystem, an effort to create a toolkit for classical Machine Learning implemented in pure Rust, akin to Python's `scikit-learn`.
+
+## Current state
+
+`linfa-pls` currently provides an implementation of the following methods: 
+
+ - Partial Least Squares
+
+## Examples
+
+There is an usage example in the `examples/` directory. The example uses a BLAS backend, to run it and use the `intel-mkl` library do:
+
+```bash
+$ cargo run --example pls_regression --features linfa/intel-mkl-system
+```
+
+## License
+Dual-licensed to be compatible with the Rust project.
+
+Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be copied, modified, or distributed except according to those terms.
+
diff --git a/algorithms/linfa-preprocessing/Cargo.toml b/algorithms/linfa-preprocessing/Cargo.toml
index 894b29958..8cfea99db 100644
--- a/algorithms/linfa-preprocessing/Cargo.toml
+++ b/algorithms/linfa-preprocessing/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-preprocessing"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Sauro98 <ivadonadi98@gmail.com>"]
 
 description = "A Machine Learning framework for Rust"
@@ -17,7 +17,7 @@ categories = ["algorithms", "mathematics", "science"]
 
 [dependencies]
 
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
 ndarray = { version = "0.14", default-features = false, features = ["approx", "blas"] }
 ndarray-linalg = { version = "0.13" }
 ndarray-stats = "0.4"
@@ -30,8 +30,8 @@ encoding = "0.2"
 sprs =  { version="0.9.4", default-features = false }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["diabetes", "winequality"] }
-linfa-bayes = { version = "0.3.1", path = "../linfa-bayes" }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["diabetes", "winequality"] }
+linfa-bayes = { version = "0.4.0", path = "../linfa-bayes" }
 iai = "0.1" 
 curl = "0.4.35"
 flate2 = "1.0.20"
diff --git a/algorithms/linfa-reduction/Cargo.toml b/algorithms/linfa-reduction/Cargo.toml
index 16e8f0a0d..bdf10ea27 100644
--- a/algorithms/linfa-reduction/Cargo.toml
+++ b/algorithms/linfa-reduction/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-reduction"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <bytesnake@mailbox.org>"]
 description = "A collection of dimensionality reduction techniques"
 edition = "2018"
@@ -31,11 +31,11 @@ ndarray-rand = "0.13"
 num-traits = "0.2"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
-linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
+linfa-kernel = { version = "0.4.0", path = "../linfa-kernel" }
 
 [dev-dependencies]
 rand = { version = "0.8", features = ["small_rng"] }
 ndarray-npy = { version = "0.7", default-features = false }
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["iris"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["iris"] }
 approx = { version = "0.4", default-features = false, features = ["std"] }
diff --git a/algorithms/linfa-svm/Cargo.toml b/algorithms/linfa-svm/Cargo.toml
index 2c41cdd4b..68790bc7e 100644
--- a/algorithms/linfa-svm/Cargo.toml
+++ b/algorithms/linfa-svm/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-svm"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2018"
 authors = ["Lorenz Schmidt <lorenz.schmidt@mailbox.org>"]
 description = "Support Vector Machines"
@@ -29,9 +29,9 @@ ndarray-rand = "0.13"
 num-traits = "0.2"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
-linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
+linfa = { version = "0.4.0", path = "../.." }
+linfa-kernel = { version = "0.4.0", path = "../linfa-kernel" }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality", "diabetes"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["winequality", "diabetes"] }
 rand_isaac = "0.3"
diff --git a/algorithms/linfa-trees/Cargo.toml b/algorithms/linfa-trees/Cargo.toml
index 4655987c2..4396f175a 100644
--- a/algorithms/linfa-trees/Cargo.toml
+++ b/algorithms/linfa-trees/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-trees"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2018"
 authors = ["Moss Ebeling <moss@banay.me>"]
 description = "A collection of tree-based algorithms"
@@ -27,14 +27,14 @@ features = ["std", "derive"]
 ndarray = { version = "0.14" , features = ["rayon", "approx"]}
 ndarray-rand = "0.13"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
 rand = { version = "0.8", features = ["small_rng"] }
 criterion = "0.3"
 approx = "0.4"
 
-linfa-datasets = { version = "0.3.1", path = "../../datasets/", features = ["iris"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets/", features = ["iris"] }
 
 [[bench]]
 name = "decision_tree"
diff --git a/algorithms/linfa-tsne/Cargo.toml b/algorithms/linfa-tsne/Cargo.toml
index 09a0c2377..9fa227ab9 100644
--- a/algorithms/linfa-tsne/Cargo.toml
+++ b/algorithms/linfa-tsne/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-tsne"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <lorenz.schmidt@mailbox.org>"]
 edition = "2018"
 
@@ -10,7 +10,7 @@ license = "MIT/Apache-2.0"
 repository = "https://github.com/rust-ml/linfa"
 readme = "README.md"
 
-keywords = ["tsne", "data visualization", "clustering", "machine-learning", "linfa"]
+keywords = ["tsne", "visualization", "clustering", "machine-learning", "linfa"]
 categories = ["algorithms", "mathematics", "science"]
 
 [dependencies]
@@ -19,11 +19,12 @@ ndarray = { version = "0.14", default-features = false }
 ndarray-rand = "0.13"
 bhtsne = "0.4.0"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
 rand = "0.8"
 approx = "0.4"
+mnist = { version = "0.4", features = ["download"] }
 
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["iris"] }
-linfa-reduction = { version = "0.3.1", path = "../linfa-reduction" }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["iris"] }
+linfa-reduction = { version = "0.4.0", path = "../linfa-reduction" }
diff --git a/algorithms/linfa-tsne/examples/mnist.rs b/algorithms/linfa-tsne/examples/mnist.rs
new file mode 100644
index 000000000..ca8228a2f
--- /dev/null
+++ b/algorithms/linfa-tsne/examples/mnist.rs
@@ -0,0 +1,60 @@
+use linfa::traits::{Fit, Transformer};
+use linfa::Dataset;
+use linfa_reduction::Pca;
+use linfa_tsne::{Result, TSne};
+use mnist::{Mnist, MnistBuilder};
+use ndarray::Array;
+use std::{io::Write, process::Command};
+
+fn main() -> Result<()> {
+    // use 50k samples from the MNIST dataset
+    let (trn_size, rows, cols) = (50_000usize, 28, 28);
+
+    // download and extract it into a dataset
+    let Mnist {
+        trn_img, trn_lbl, ..
+    } = MnistBuilder::new()
+        .label_format_digit()
+        .training_set_length(trn_size as u32)
+        .download_and_extract()
+        .finalize();
+
+    // create a dataset from it
+    let ds = Dataset::new(
+        Array::from_shape_vec((trn_size, rows * cols), trn_img)?.mapv(|x| (x as f64) / 255.),
+        Array::from_shape_vec((trn_size, 1), trn_lbl)?,
+    );
+
+    // reduce to 50 dimension without whitening
+    let ds = Pca::params(50)
+        .whiten(false)
+        .fit(&ds)
+        .unwrap()
+        .transform(ds);
+
+    // calculate a two-dimensional embedding with Barnes-Hut t-SNE
+    let ds = TSne::embedding_size(2)
+        .perplexity(50.0)
+        .approx_threshold(0.5)
+        .max_iter(1000)
+        .transform(ds)?;
+
+    // write out
+    let mut f = std::fs::File::create("examples/mnist.dat").unwrap();
+
+    for (x, y) in ds.sample_iter() {
+        f.write(format!("{} {} {}\n", x[0], x[1], y[0]).as_bytes())
+            .unwrap();
+    }
+
+    // and plot with gnuplot
+    Command::new("gnuplot")
+        .arg("-p")
+        .arg("examples/mnist_plot.plt")
+        .spawn()
+        .expect(
+            "Failed to launch gnuplot. Pleasure ensure that gnuplot is installed and on the $PATH.",
+        );
+
+    Ok(())
+}
diff --git a/algorithms/linfa-tsne/examples/iris_plot.plt b/algorithms/linfa-tsne/examples/mnist_plot.plt
similarity index 100%
rename from algorithms/linfa-tsne/examples/iris_plot.plt
rename to algorithms/linfa-tsne/examples/mnist_plot.plt
diff --git a/datasets/Cargo.toml b/datasets/Cargo.toml
index 9406109f5..d9517a9cd 100644
--- a/datasets/Cargo.toml
+++ b/datasets/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-datasets"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <bytesnake@mailbox.org>"]
 description = "Collection of small datasets for Linfa"
 edition = "2018"
@@ -8,7 +8,7 @@ license = "MIT/Apache-2.0"
 repository = "https://github.com/rust-ml/linfa"
 
 [dependencies]
-linfa = { version = "0.3.1", path = ".." }
+linfa = { version = "0.4.0", path = ".." }
 ndarray = { version = "0.14", default-features = false }
 ndarray-csv = "=0.5.0"
 csv = "1.1"
diff --git a/docs/website/content/news/release040/index.md b/docs/website/content/news/release040/index.md
new file mode 100644
index 000000000..971aed749
--- /dev/null
+++ b/docs/website/content/news/release040/index.md
@@ -0,0 +1,177 @@
++++
+title = "Release 0.4.0"
+date = "2021-04-28"
++++
+
+Linfa's 0.4.0 release introduces four new algorithms, improves documentation of the ICA and K-means implementations, adds more benchmarks to K-Means and updates to ndarray's 0.14 version.
+
+<!-- more -->
+
+## New algorithms
+
+The [Partial Least Squares Regression](https://en.wikipedia.org/wiki/Partial_least_squares_regression) model family is added in this release. It projects the observable, as well as predicted variables to a latent space and maximizes the correlation for them. For problems with a large number of targets or collinear predictors it gives a better performance when compared to standard regression. For more information look into the documentation of `linfa-pls`.
+
+A wrapper for Barnes-Hut t-SNE is also added in this release. The t-SNE algorithm is often used for data visualization and projects data in a high-dimensional space to a similar representation in two/three dimension. It does so by maximizing the Kullback-Leibler Divergence between the high dimensional source distribution to the target distribution. The Barnes-Hut approximation improves the runtime drastically while retaining the performance. Kudos to [github/frjnn](https://github.com/frjnn/) for providing an implementation!
+
+A new preprocessing crate makes working with textual data and data normalization easy. It implements _count-vectorizer_ and _IT-IDF_ normalization for text pre-processing. Normalizations for signals include linear scaling, norm scaling and whitening with PCA/ZCA/choelsky. An example with a Naive Bayes model achieves 84% F1 score for predicting categories `alt.atheism`, `talk.religion.misc`, `comp.graphics` and `sci.space` on a news dataset.
+
+[Platt scaling](https://en.wikipedia.org/wiki/Platt_scaling) calibrates a real-valued classification model to probabilities over two classes. This is used for the SV classification when probabilities are required. Further a multi class model, combining multiple binary models (e.g. calibrated SVM models) into a single multi-class model is also added. These composing models are moved to the `linfa/src/composing/` subfolder.
+
+## Improvements
+
+Numerous improvements are added to the KMeans implementation, thanks to @YuhanLiin. The implementation is optimized for offline training, an incremental training model is added and KMeans++/KMeans|| initialization gives good initial cluster means for medium and large datasets.
+
+We also moved to ndarray's version 0.14 and introduced `F::cast` for simpler floating point casting. The trait signature of `linfa::Fit` is changed such that it always returns a `Result` and error handling is added for the `linfa-logistic` and `linfa-reduction` subcrates.
+
+You often have to compare several model parametrization with k-folding. For this a new function `cross_validate` is added which takes the number of folds, model parameters and a closure for the evaluation metric. It automatically calls k-folding and averages the metric over the folds. To compare different L1 ratios of an elasticnet model, you can use it in the following way:
+```rust
+// L1 ratios to compare
+let ratios = vec![0.1, 0.2, 0.5, 0.7, 1.0];
+
+// create a model for each parameter
+let models = ratios
+    .iter()
+    .map(|ratio| ElasticNet::params().penalty(0.3).l1_ratio(*ratio))
+    .collect::<Vec<_>>();
+
+// get the mean r2 validation score across 5 folds for each model
+let r2_values =
+    dataset.cross_validate(5, &models, |prediction, truth| prediction.r2(&truth))?;
+
+// show the mean r2 score for each parameter choice
+for (ratio, r2) in ratios.iter().zip(r2_values.iter()) {
+    println!("L1 ratio: {}, r2 score: {}", ratio, r2);
+}
+```
+
+### Other changes
+
+ * fix for border points in the DBSCAN implementation
+ * improved documentation of the ICA subcrate
+ * prevent overflowing code example in website
+
+## Barnes-Hut t-SNE
+
+This example shows the use of `linfa-tsne` with the MNIST digits dataset. We are going to load the MNIST dataset, then reduce the dimensionality with PCA to an embedding of 50 dimension and finally apply Barnes-Hut t-SNE for a two-dimensional embedding. This embedding can be plotted to give the following image:
+
+<img src="tsne.png" style="width: 100%" />
+
+I won't go into details how to load the MNIST dataset, but we are using the excellent [crates.io/mnist](https://crates.io/crates/mnist) crate here to help us downloading and representing the images in a proper vector representation.
+
+```rust
+// use 50k samples from the MNIST dataset
+let (trn_size, rows, cols) = (50_000, 28, 28);
+
+// download and extract it into a dataset
+let Mnist { images, labels, .. } = MnistBuilder::new()
+    .label_format_digit()
+    .training_set_length(trn_size as u32)
+    .download_and_extract()
+    .finalize();
+```
+
+The image brightness information `images` and corresponding `labels` are then used to construct a dataset.
+
+```rust
+// create a dataset from magnitudes and targets
+let ds = Dataset::new(
+    Array::from_shape_vec((trn_size, rows * cols), images)?.mapv(|x| (x as f64) / 255.),
+    Array::from_shape_vec((trn_size, 1), labels)?
+);
+```
+
+In a preliminary step this brightness information is transformed from a 784 dimensional vector representation to a 50 dimensional embedding with maximized variance. The Principal Component Analysis uses LOBPCG for an efficient implementation. No whitening is performed as this hurts the results.
+
+```rust
+let ds = Pca::params(50).whiten(false).fit(&ds).transform(ds);
+```
+
+Then t-SNE is used to project those 50 dimensions in a non-linear way to retain as much of the structural information as possible. We will use a Barnes-Hut approximation with `theta=0.5`. This performs a space partitioning and combines regions very far away from the corresponding point to reduce the required runtime. The value theta can go from zero to one with one the original non-approximate t-SNE algorithm. We will also cap the runtime to a thousand iterations:
+
+```rust
+let ds = TSne::embedding_size(2)
+    .perplexity(50.0)
+    .approx_threshold(0.5)
+    .max_iter(1000)
+    .transform(ds)?;
+```
+
+The resulting embedding can then be written out to a file and plotted with `gnuplot`:
+
+```rust
+let mut f = File::create("examples/mnist.dat").unwrap();
+
+for (x, y) in ds.sample_iter() {
+    f.write(format!("{} {} {}\n", x[0], x[1], y[0]).as_bytes())
+        .unwrap();
+}
+```
+
+You can find the full example at [algorithms/linfa-tsne/examples/mnist.rs](https://github.com/rust-ml/linfa/blob/master/algorithms/linfa-tsne/examples/mnist.rs) and run it with 
+```
+$ cargo run --example  mnist --features linfa/intel-mkl-system --release
+```
+
+## Preprocessing text data with TF-IDF and `linfa-preprocessing`
+
+Let's move to a different example. This release sees the publication of the first `linfa-preprocessing` version which already includes many algorithms suitable for text processing. We will try to predict the topic of a newspaper article with Gaussian Naive Bayes algorithm. Prior to training such a model, we need to somehow extract continuous embeddings from the text. With a number of sample files `training_filenames` we can use `linfa-preprocessing` to construct a vocabulary by calling:
+
+```rust
+let vectorizer = TfIdfVectorizer::default()
+    .fit_files(&training_filenames, ISO_8859_1, Strict)?;
+
+println!(
+    "We obtain a vocabulary with {} entries",
+    vectorizer.nentries()
+);  
+
+// construction of targets and dataset omitted here
+let training_dataset = //...
+```
+
+This vocabulary can then be used to extract an embedding for a text file. The Naive Bayes algorithm does not work with sparse matrices, so we have to make the embedding matrix dense.
+
+```rust
+let training_records = vectorizer
+  .transform_files(&training_filenames, ISO_8859_1, Strict)
+  .to_dense();
+```
+
+The Gaussian Naive Bayes is trained with the default parameters and the dataset passed for training: (the construction of the targets is omitted here)
+```rust
+let model = GaussianNbParams::params().fit(&training_dataset)?;
+let training_prediction = model.predict(&training_dataset);
+
+let cm = training_prediction
+    .confusion_matrix(&training_dataset)?;
+
+// this gives an F1 score of 0.9994
+println!("The fitted model has a training f1 score of {}", cm.f1_score());   
+```
+
+To evaluate the model we have a second set of `test_filenames` which are again transformed to its dense embedding representation. The Gaussian Naive Bayes model is then used to predict the targets. The confusion matrix and F1 score measures its performance.
+
+```rust
+let test_records = vectorizer
+    .transform_files(&test_filenames, ISO_8859_1, Strict)
+    .to_dense();
+
+// get targets and construct testing dataset 
+// ...
+
+// predict the testing targets
+let test_prediction: Array1<usize> = model.predict(&test_dataset);
+
+// create a confusion matrix and print F1 score
+let cm = test_prediction.confusion_matrix(&test_dataset)?
+println!("{:?}", cm);                                                                                                                                                                  
+
+// the evaluation gives an F1 score of 0.8402
+println!("The model has a test f1 score of {}", cm.f1_score());
+```
+
+You can find the full example at [algorithms/linfa-preprocessing/examples/tfidf_vectorizer.rs](https://github.com/rust-ml/linfa/blob/master/algorithms/linfa-preprocessing/examples/tfidf_vectorization.rs) and run it with 
+```
+$ cargo run --example tfidf_vectorizer --release
+
+```
diff --git a/docs/website/content/news/release040/tsne.png b/docs/website/content/news/release040/tsne.png
new file mode 100644
index 000000000..ae8ddcbec
Binary files /dev/null and b/docs/website/content/news/release040/tsne.png differ
diff --git a/docs/website/content/snippets/multi-class.md b/docs/website/content/snippets/multi-class.md
new file mode 100644
index 000000000..3e38e2e51
--- /dev/null
+++ b/docs/website/content/snippets/multi-class.md
@@ -0,0 +1,20 @@
++++
+title = "Multi Class"
++++
+```rust
+let params = Svm::<_, Pr>::params()
+    .gaussian_kernel(30.0);
+
+// assume we have a binary decision model (here SVM) 
+// predicting probability. We can merge them into a 
+// multi-class model by collecting several of them
+// into a `MultiClassModel`
+let model = train
+    .one_vs_all()?
+    .into_iter()
+    .map(|(l, x)| (l, params.fit(&x).unwrap()))
+    .collect::<MultiClassModel<_, _>>();
+
+// predict multi-class label
+let pred = model.predict(&valid);
+```
diff --git a/src/lib.rs b/src/lib.rs
index b678bdb55..08bde0bdc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -32,9 +32,9 @@
 //! | [hierarchical](https://docs.rs/linfa-hierarchical/) | Agglomerative hierarchical clustering | Tested | Unsupervised learning | Cluster and build hierarchy of clusters |
 //! | [bayes](https://docs.rs/linfa-bayes/) | Naive Bayes | Tested | Supervised learning | Contains Gaussian Naive Bayes |
 //! | [ica](https://docs.rs/linfa-ica/) | Independent component analysis | Tested | Unsupervised learning | Contains FastICA implementation |
-//! | [pls](algorithms/linfa-pls/) | Partial Least Squares | Tested | Supervised learning | Contains PLS estimators for dimensionality reduction and regression |
-//! | [tsne](algorithms/linfa-tsne/) | Dimensionality reduction| Tested | Unsupervised learning | Contains exact solution and Barnes-Hut approximation t-SNE |
-//! | [preprocessing](algorithms/linfa-preprocessing/) |Normalization & Vectorization| Tested | Pre-processing | Contains data normalization/whitening and count vectorization/tf-idf|
+//! | [pls](https://docs.rs/linfa-pls/) | Partial Least Squares | Tested | Supervised learning | Contains PLS estimators for dimensionality reduction and regression |
+//! | [tsne](https://docs.rs/linfa-tsne/) | Dimensionality reduction| Tested | Unsupervised learning | Contains exact solution and Barnes-Hut approximation t-SNE |
+//! | [preprocessing](https://docs.rs/linfa-preprocessing/) |Normalization & Vectorization| Tested | Pre-processing | Contains data normalization/whitening and count vectorization/tf-idf|
 //!
 //! We believe that only a significant community effort can nurture, build, and sustain a machine learning ecosystem in Rust - there is no other way forward.
 //!